diff options
Diffstat (limited to 'src')
46 files changed, 2022 insertions, 80 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index a0ae07752..d25a1a645 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -42,6 +42,7 @@ add_library(audio_core STATIC voice_context.h $<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h> + $<$<BOOL:${ENABLE_SDL2}>:sdl2_sink.cpp sdl2_sink.h> ) create_target_directory_groups(audio_core) @@ -71,3 +72,7 @@ if(ENABLE_CUBEB) target_link_libraries(audio_core PRIVATE cubeb) target_compile_definitions(audio_core PRIVATE -DHAVE_CUBEB=1) endif() +if(ENABLE_SDL2) + target_link_libraries(audio_core PRIVATE SDL2) + target_compile_definitions(audio_core PRIVATE HAVE_SDL2) +endif() diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp new file mode 100644 index 000000000..62d3716a6 --- /dev/null +++ b/src/audio_core/sdl2_sink.cpp @@ -0,0 +1,163 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <atomic> +#include <cstring> +#include "audio_core/sdl2_sink.h" +#include "audio_core/stream.h" +#include "audio_core/time_stretch.h" +#include "common/assert.h" +#include "common/logging/log.h" +//#include "common/settings.h" + +// Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307 +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" +#endif +#include <SDL.h> +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +namespace AudioCore { + +class SDLSinkStream final : public SinkStream { +public: + SDLSinkStream(u32 sample_rate, u32 num_channels_, const std::string& output_device) + : num_channels{std::min(num_channels_, 6u)}, time_stretch{sample_rate, num_channels} { + + SDL_AudioSpec spec; + spec.freq = sample_rate; + spec.channels = static_cast<u8>(num_channels); + spec.format = AUDIO_S16SYS; + spec.samples = 4096; + spec.callback = nullptr; + + SDL_AudioSpec obtained; + if (output_device.empty()) { + dev = SDL_OpenAudioDevice(nullptr, 0, &spec, &obtained, 0); + } else { + dev = SDL_OpenAudioDevice(output_device.c_str(), 0, &spec, &obtained, 0); + } + + if (dev == 0) { + LOG_CRITICAL(Audio_Sink, "Error opening sdl audio device: {}", SDL_GetError()); + return; + } + + SDL_PauseAudioDevice(dev, 0); + } + + ~SDLSinkStream() override { + if (dev == 0) { + return; + } + + SDL_CloseAudioDevice(dev); + } + + void EnqueueSamples(u32 source_num_channels, const std::vector<s16>& samples) override { + if (source_num_channels > num_channels) { + // Downsample 6 channels to 2 + ASSERT_MSG(source_num_channels == 6, "Channel count must be 6"); + + std::vector<s16> buf; + buf.reserve(samples.size() * num_channels / source_num_channels); + for (std::size_t i = 0; i < samples.size(); i += source_num_channels) { + // Downmixing implementation taken from the ATSC standard + const s16 left{samples[i + 0]}; + const s16 right{samples[i + 1]}; + const s16 center{samples[i + 2]}; + const s16 surround_left{samples[i + 4]}; + const s16 surround_right{samples[i + 5]}; + // Not used in the ATSC reference implementation + [[maybe_unused]] const s16 low_frequency_effects{samples[i + 3]}; + + constexpr s32 clev{707}; // center mixing level coefficient + constexpr s32 slev{707}; // surround mixing level coefficient + + buf.push_back(static_cast<s16>(left + (clev * center / 1000) + + (slev * surround_left / 1000))); + buf.push_back(static_cast<s16>(right + (clev * center / 1000) + + (slev * surround_right / 1000))); + } + int ret = SDL_QueueAudio(dev, static_cast<const void*>(buf.data()), + static_cast<u32>(buf.size() * sizeof(s16))); + if (ret < 0) + LOG_WARNING(Audio_Sink, "Could not queue audio buffer: {}", SDL_GetError()); + return; + } + + int ret = SDL_QueueAudio(dev, static_cast<const void*>(samples.data()), + static_cast<u32>(samples.size() * sizeof(s16))); + if (ret < 0) + LOG_WARNING(Audio_Sink, "Could not queue audio buffer: {}", SDL_GetError()); + } + + std::size_t SamplesInQueue(u32 channel_count) const override { + if (dev == 0) + return 0; + + return SDL_GetQueuedAudioSize(dev) / (channel_count * sizeof(s16)); + } + + void Flush() override { + should_flush = true; + } + + u32 GetNumChannels() const { + return num_channels; + } + +private: + SDL_AudioDeviceID dev = 0; + u32 num_channels{}; + std::atomic<bool> should_flush{}; + TimeStretcher time_stretch; +}; + +SDLSink::SDLSink(std::string_view target_device_name) { + if (!SDL_WasInit(SDL_INIT_AUDIO)) { + if (SDL_InitSubSystem(SDL_INIT_AUDIO) < 0) { + LOG_CRITICAL(Audio_Sink, "SDL_InitSubSystem audio failed: {}", SDL_GetError()); + return; + } + } + + if (target_device_name != auto_device_name && !target_device_name.empty()) { + output_device = target_device_name; + } else { + output_device.clear(); + } +} + +SDLSink::~SDLSink() = default; + +SinkStream& SDLSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, const std::string&) { + sink_streams.push_back( + std::make_unique<SDLSinkStream>(sample_rate, num_channels, output_device)); + return *sink_streams.back(); +} + +std::vector<std::string> ListSDLSinkDevices() { + std::vector<std::string> device_list; + + if (!SDL_WasInit(SDL_INIT_AUDIO)) { + if (SDL_InitSubSystem(SDL_INIT_AUDIO) < 0) { + LOG_CRITICAL(Audio_Sink, "SDL_InitSubSystem audio failed: {}", SDL_GetError()); + return {}; + } + } + + const int device_count = SDL_GetNumAudioDevices(0); + for (int i = 0; i < device_count; ++i) { + device_list.emplace_back(SDL_GetAudioDeviceName(i, 0)); + } + + return device_list; +} + +} // namespace AudioCore diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h new file mode 100644 index 000000000..8ec1526d8 --- /dev/null +++ b/src/audio_core/sdl2_sink.h @@ -0,0 +1,29 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> +#include <vector> + +#include "audio_core/sink.h" + +namespace AudioCore { + +class SDLSink final : public Sink { +public: + explicit SDLSink(std::string_view device_id); + ~SDLSink() override; + + SinkStream& AcquireSinkStream(u32 sample_rate, u32 num_channels, + const std::string& name) override; + +private: + std::string output_device; + std::vector<SinkStreamPtr> sink_streams; +}; + +std::vector<std::string> ListSDLSinkDevices(); + +} // namespace AudioCore diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp index a848eb1c9..de10aecd2 100644 --- a/src/audio_core/sink_details.cpp +++ b/src/audio_core/sink_details.cpp @@ -11,6 +11,9 @@ #ifdef HAVE_CUBEB #include "audio_core/cubeb_sink.h" #endif +#ifdef HAVE_SDL2 +#include "audio_core/sdl2_sink.h" +#endif #include "common/logging/log.h" namespace AudioCore { @@ -36,6 +39,13 @@ constexpr SinkDetails sink_details[] = { }, &ListCubebSinkDevices}, #endif +#ifdef HAVE_SDL2 + SinkDetails{"sdl2", + [](std::string_view device_id) -> std::unique_ptr<Sink> { + return std::make_unique<SDLSink>(device_id); + }, + &ListSDLSinkDevices}, +#endif SinkDetails{"null", [](std::string_view device_id) -> std::unique_ptr<Sink> { return std::make_unique<NullSink>(device_id); diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp index c84f31f3e..710e88b39 100644 --- a/src/common/fs/file.cpp +++ b/src/common/fs/file.cpp @@ -183,10 +183,6 @@ size_t WriteStringToFile(const std::filesystem::path& path, FileType type, size_t AppendStringToFile(const std::filesystem::path& path, FileType type, std::string_view string) { - if (!Exists(path)) { - return WriteStringToFile(path, type, string); - } - if (!IsFile(path)) { return 0; } diff --git a/src/common/fs/file.h b/src/common/fs/file.h index 50e270c5b..0f10b6003 100644 --- a/src/common/fs/file.h +++ b/src/common/fs/file.h @@ -71,7 +71,7 @@ template <typename Path> /** * Writes a string to a file at path and returns the number of characters successfully written. - * If an file already exists at path, its contents will be erased. + * If a file already exists at path, its contents will be erased. * If the filesystem object at path is not a file, this function returns 0. * * @param path Filesystem path @@ -95,7 +95,6 @@ template <typename Path> /** * Appends a string to a file at path and returns the number of characters successfully written. - * If a file does not exist at path, WriteStringToFile is called instead. * If the filesystem object at path is not a file, this function returns 0. * * @param path Filesystem path diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp index 8bd70abc7..2a5a7596c 100644 --- a/src/common/host_memory.cpp +++ b/src/common/host_memory.cpp @@ -34,7 +34,7 @@ constexpr size_t HugePageSize = 0x200000; // Manually imported for MinGW compatibility #ifndef MEM_RESERVE_PLACEHOLDER -#define MEM_RESERVE_PLACEHOLDER 0x0004000 +#define MEM_RESERVE_PLACEHOLDER 0x00040000 #endif #ifndef MEM_REPLACE_PLACEHOLDER #define MEM_REPLACE_PLACEHOLDER 0x00004000 diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 360e878d6..6397308ec 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -55,6 +55,7 @@ void LogSettings() { log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); + log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); @@ -121,6 +122,7 @@ void RestoreGlobalState(bool is_powered_on) { values.cpu_accuracy.SetGlobal(true); values.cpuopt_unsafe_unfuse_fma.SetGlobal(true); values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true); + values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true); values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); values.cpuopt_unsafe_fastmem_check.SetGlobal(true); @@ -135,6 +137,7 @@ void RestoreGlobalState(bool is_powered_on) { values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); values.use_nvdec_emulation.SetGlobal(true); + values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); values.use_assembly_shaders.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 1af8c5ac2..85554eac4 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -129,6 +129,7 @@ struct Values { Setting<bool> cpuopt_unsafe_unfuse_fma; Setting<bool> cpuopt_unsafe_reduce_fp_error; + Setting<bool> cpuopt_unsafe_ignore_standard_fpcr; Setting<bool> cpuopt_unsafe_inaccurate_nan; Setting<bool> cpuopt_unsafe_fastmem_check; @@ -147,7 +148,9 @@ struct Values { Setting<GPUAccuracy> gpu_accuracy; Setting<bool> use_asynchronous_gpu_emulation; Setting<bool> use_nvdec_emulation; + Setting<bool> accelerate_astc; Setting<bool> use_vsync; + Setting<bool> disable_fps_limit; Setting<bool> use_assembly_shaders; Setting<bool> use_asynchronous_shaders; Setting<bool> use_fast_gpu_time; @@ -218,6 +221,7 @@ struct Values { std::string program_args; bool dump_exefs; bool dump_nso; + bool enable_fs_access_log; bool reporting_services; bool quest_flag; bool disable_macro_jit; diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index c8f6dc765..f871f7bf4 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -186,6 +186,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* if (Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP; } + if (Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue()) { + config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue; + } if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; } diff --git a/src/core/core.cpp b/src/core/core.cpp index c5004b7b4..e6f1aa0e7 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <array> +#include <atomic> #include <memory> #include <utility> @@ -377,7 +378,7 @@ struct System::Impl { std::unique_ptr<Core::DeviceMemory> device_memory; Core::Memory::Memory memory; CpuManager cpu_manager; - bool is_powered_on = false; + std::atomic_bool is_powered_on{}; bool exit_lock = false; Reporter reporter; @@ -463,7 +464,7 @@ System::ResultStatus System::Load(Frontend::EmuWindow& emu_window, const std::st } bool System::IsPoweredOn() const { - return impl->is_powered_on; + return impl->is_powered_on.load(std::memory_order::relaxed); } void System::PrepareReschedule() { diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp index da88f35bc..0c4bba66b 100644 --- a/src/core/hle/kernel/k_resource_limit.cpp +++ b/src/core/hle/kernel/k_resource_limit.cpp @@ -79,6 +79,7 @@ ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) { R_UNLESS(current_values[index] <= value, ResultInvalidState); limit_values[index] = value; + peak_values[index] = current_values[index]; return ResultSuccess; } diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 3af9881c2..db4d44c12 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "common/hex_util.h" #include "common/logging/log.h" +#include "common/settings.h" #include "common/string_util.h" #include "core/core.h" #include "core/file_sys/directory.h" @@ -785,6 +786,10 @@ FSP_SRV::FSP_SRV(Core::System& system_) }; // clang-format on RegisterHandlers(functions); + + if (Settings::values.enable_fs_access_log) { + access_log_mode = AccessLogMode::SdCard; + } } FSP_SRV::~FSP_SRV() = default; @@ -1041,9 +1046,9 @@ void FSP_SRV::DisableAutoSaveDataCreation(Kernel::HLERequestContext& ctx) { void FSP_SRV::SetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; - log_mode = rp.PopEnum<LogMode>(); + access_log_mode = rp.PopEnum<AccessLogMode>(); - LOG_DEBUG(Service_FS, "called, log_mode={:08X}", log_mode); + LOG_DEBUG(Service_FS, "called, access_log_mode={}", access_log_mode); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); @@ -1054,7 +1059,7 @@ void FSP_SRV::GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); - rb.PushEnum(log_mode); + rb.PushEnum(access_log_mode); } void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) { @@ -1062,9 +1067,9 @@ void FSP_SRV::OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx) { auto log = Common::StringFromFixedZeroTerminatedBuffer( reinterpret_cast<const char*>(raw.data()), raw.size()); - LOG_DEBUG(Service_FS, "called, log='{}'", log); + LOG_DEBUG(Service_FS, "called"); - reporter.SaveFilesystemAccessReport(log_mode, std::move(log)); + reporter.SaveFSAccessLog(log); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h index ff7455a20..556708284 100644 --- a/src/core/hle/service/filesystem/fsp_srv.h +++ b/src/core/hle/service/filesystem/fsp_srv.h @@ -24,11 +24,10 @@ enum class AccessLogVersion : u32 { Latest = V7_0_0, }; -enum class LogMode : u32 { - Off, +enum class AccessLogMode : u32 { + None, Log, - RedirectToSdCard, - LogToSdCard = Log | RedirectToSdCard, + SdCard, }; class FSP_SRV final : public ServiceFramework<FSP_SRV> { @@ -59,13 +58,12 @@ private: FileSystemController& fsc; const FileSys::ContentProvider& content_provider; + const Core::Reporter& reporter; FileSys::VirtualFile romfs; u64 current_process_id = 0; u32 access_log_program_index = 0; - LogMode log_mode = LogMode::LogToSdCard; - - const Core::Reporter& reporter; + AccessLogMode access_log_mode = AccessLogMode::None; }; } // namespace Service::FileSystem diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index d1dbc659b..1d810562f 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -307,6 +307,9 @@ void NVFlinger::Compose() { } s64 NVFlinger::GetNextTicks() const { + if (Settings::values.disable_fps_limit.GetValue()) { + return 0; + } constexpr s64 max_hertz = 120LL; return (1000000000 * (1LL << swap_interval)) / max_hertz; } diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp index bf4402308..c634b6abd 100644 --- a/src/core/hle/service/time/time_zone_content_manager.cpp +++ b/src/core/hle/service/time/time_zone_content_manager.cpp @@ -125,7 +125,7 @@ ResultCode TimeZoneContentManager::GetTimeZoneInfoFile(const std::string& locati return ERROR_TIME_NOT_FOUND; } - vfs_file = zoneinfo_dir->GetFile(location_name); + vfs_file = zoneinfo_dir->GetFileRelative(location_name); if (!vfs_file) { LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"! Using default timezone.", time_zone_binary_titleid, location_name); diff --git a/src/core/reporter.cpp b/src/core/reporter.cpp index ec2a16e62..82b0f535a 100644 --- a/src/core/reporter.cpp +++ b/src/core/reporter.cpp @@ -195,7 +195,9 @@ json GetHLERequestContextData(Kernel::HLERequestContext& ctx, Core::Memory::Memo namespace Core { -Reporter::Reporter(System& system_) : system(system_) {} +Reporter::Reporter(System& system_) : system(system_) { + ClearFSAccessLog(); +} Reporter::~Reporter() = default; @@ -362,22 +364,12 @@ void Reporter::SaveErrorReport(u64 title_id, ResultCode result, SaveToFile(std::move(out), GetPath("error_report", title_id, timestamp)); } -void Reporter::SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, - std::string log_message) const { - if (!IsReportingEnabled()) - return; - - const auto timestamp = GetTimestamp(); - const auto title_id = system.CurrentProcess()->GetTitleID(); - json out; +void Reporter::SaveFSAccessLog(std::string_view log_message) const { + const auto access_log_path = + Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "FsAccessLog.txt"; - out["yuzu_version"] = GetYuzuVersionData(); - out["report_common"] = GetReportCommonData(title_id, ResultSuccess, timestamp); - - out["log_mode"] = fmt::format("{:08X}", static_cast<u32>(log_mode)); - out["log_message"] = std::move(log_message); - - SaveToFile(std::move(out), GetPath("filesystem_access_report", title_id, timestamp)); + void(Common::FS::AppendStringToFile(access_log_path, Common::FS::FileType::TextFile, + log_message)); } void Reporter::SaveUserReport() const { @@ -392,6 +384,18 @@ void Reporter::SaveUserReport() const { GetPath("user_report", title_id, timestamp)); } +void Reporter::ClearFSAccessLog() const { + const auto access_log_path = + Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "FsAccessLog.txt"; + + Common::FS::IOFile access_log_file{access_log_path, Common::FS::FileAccessMode::Write, + Common::FS::FileType::TextFile}; + + if (!access_log_file.IsOpen()) { + LOG_ERROR(Common_Filesystem, "Failed to clear the filesystem access log."); + } +} + bool Reporter::IsReportingEnabled() const { return Settings::values.reporting_services; } diff --git a/src/core/reporter.h b/src/core/reporter.h index 6fb6ebffa..6e9edeea3 100644 --- a/src/core/reporter.h +++ b/src/core/reporter.h @@ -16,10 +16,6 @@ namespace Kernel { class HLERequestContext; } // namespace Kernel -namespace Service::FileSystem { -enum class LogMode : u32; -} - namespace Service::LM { struct LogMessage; } // namespace Service::LM @@ -69,14 +65,15 @@ public: std::optional<std::string> custom_text_main = {}, std::optional<std::string> custom_text_detail = {}) const; - void SaveFilesystemAccessReport(Service::FileSystem::LogMode log_mode, - std::string log_message) const; + void SaveFSAccessLog(std::string_view log_message) const; // Can be used anywhere to generate a backtrace and general info report at any point during // execution. Not intended to be used for anything other than debugging or testing. void SaveUserReport() const; private: + void ClearFSAccessLog() const; + bool IsReportingEnabled() const; System& system; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index ad1a9ffb4..d4c23ced2 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -230,6 +230,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, Settings::values.use_asynchronous_gpu_emulation.GetValue()); AddField(field_type, "Renderer_UseNvdecEmulation", Settings::values.use_nvdec_emulation.GetValue()); + AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 47190c464..f9454bbaa 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -237,6 +237,7 @@ add_library(video_core STATIC texture_cache/util.cpp texture_cache/util.h textures/astc.h + textures/astc.cpp textures/decoders.cpp textures/decoders.h textures/texture.cpp diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 703e34587..eaba1b103 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -763,7 +763,7 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode) { case 1: { READ_UINT_VALUES(2) uint L0 = (v[0] >> 2) | (v[1] & 0xC0); - uint L1 = max(L0 + (v[1] & 0x3F), 0xFFU); + uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); ep1 = uvec4(0xFF, L0, L0, L0); ep2 = uvec4(0xFF, L1, L1, L1); break; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ffe9edc1b..9b4038615 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -9,6 +9,8 @@ #include <glad/glad.h> +#include "common/settings.h" + #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state_tracker.h" @@ -307,7 +309,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info) { - return !runtime.HasNativeASTC() && IsPixelFormatASTC(info.format); + if (IsPixelFormatASTC(info.format)) { + return !runtime.HasNativeASTC() && Settings::values.accelerate_astc.GetValue(); + } // Disable other accelerated uploads for now as they don't implement swizzled uploads return false; switch (info.type) { diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index db78ce3d9..6852c11b0 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -2,8 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <atomic> -#include <chrono> +#include <thread> #include "common/settings.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" @@ -12,8 +11,6 @@ namespace Vulkan { -using namespace std::chrono_literals; - MasterSemaphore::MasterSemaphore(const Device& device) { static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, @@ -34,9 +31,9 @@ MasterSemaphore::MasterSemaphore(const Device& device) { // Validation layers have a bug where they fail to track resource usage when using timeline // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have // a separate thread waiting for each timeline semaphore value. - debug_thread = std::thread([this] { + debug_thread = std::jthread([this](std::stop_token stop_token) { u64 counter = 0; - while (!shutdown) { + while (!stop_token.stop_requested()) { if (semaphore.Wait(counter, 10'000'000)) { ++counter; } @@ -44,13 +41,6 @@ MasterSemaphore::MasterSemaphore(const Device& device) { }); } -MasterSemaphore::~MasterSemaphore() { - shutdown = true; - - // This thread might not be started - if (debug_thread.joinable()) { - debug_thread.join(); - } -} +MasterSemaphore::~MasterSemaphore() = default; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 4b6d64daa..ee3cd35d0 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -65,11 +65,10 @@ public: } private: - vk::Semaphore semaphore; ///< Timeline semaphore. - std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. - std::atomic<u64> current_tick{1}; ///< Current logical tick. - std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed. - std::thread debug_thread; ///< Debug thread to workaround validation layer bugs. + vk::Semaphore semaphore; ///< Timeline semaphore. + std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. + std::atomic<u64> current_tick{1}; ///< Current logical tick. + std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index bdd0ce8bc..52860b4cf 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -8,6 +8,7 @@ #include <vector> #include "common/bit_cast.h" +#include "common/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/renderer_vulkan/blit_image.h" @@ -828,7 +829,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); } if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { - flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; + if (Settings::values.accelerate_astc.GetValue()) { + flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; + } else { + flags |= VideoCommon::ImageFlagBits::Converted; + } } if (runtime.device.HasDebuggingToolAttached()) { if (image) { diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 906604a39..6835fd747 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -47,6 +47,7 @@ #include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/util.h" +#include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" namespace VideoCommon { @@ -884,8 +885,16 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 ASSERT(copy.image_extent == mip_size); ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); - DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, - output.subspan(output_offset)); + if (IsPixelFormatASTC(info.format)) { + ASSERT(copy.image_extent.depth == 1); + Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), + copy.image_extent.width, copy.image_extent.height, + copy.image_subresource.num_layers, tile_size.width, + tile_size.height, output.subspan(output_offset)); + } else { + DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, + output.subspan(output_offset)); + } copy.buffer_offset = output_offset; copy.buffer_row_length = mip_size.width; copy.buffer_image_height = mip_size.height; @@ -1087,7 +1096,15 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const return std::nullopt; } const ImageInfo& existing = image.info; - if (False(options & RelaxedOptions::Format)) { + if (True(options & RelaxedOptions::Format)) { + // Format checking is relaxed, but we still have to check for matching bytes per block. + // This avoids creating a view for blits on UE4 titles where formats with different bytes + // per block are aliased. + if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format)) { + return std::nullopt; + } + } else { + // Format comaptibility is not relaxed, ensure we are creating a view on a compatible format if (!IsViewCompatible(existing.format, candidate.format, broken_views, native_bgr)) { return std::nullopt; } diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp new file mode 100644 index 000000000..9b2177ebd --- /dev/null +++ b/src/video_core/textures/astc.cpp @@ -0,0 +1,1577 @@ +// Copyright 2016 The University of North Carolina at Chapel Hill +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Please send all BUG REPORTS to <pavel@cs.unc.edu>. +// <http://gamma.cs.unc.edu/FasTC/> + +#include <algorithm> +#include <cassert> +#include <cstring> +#include <span> +#include <vector> + +#include <boost/container/static_vector.hpp> + +#include "common/common_types.h" +#include "video_core/textures/astc.h" + +class InputBitStream { +public: + constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0) + : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {} + + constexpr size_t GetBitsRead() const { + return bits_read; + } + + constexpr bool ReadBit() { + if (bits_read >= total_bits * 8) { + return 0; + } + const bool bit = ((*cur_byte >> next_bit) & 1) != 0; + ++next_bit; + while (next_bit >= 8) { + next_bit -= 8; + ++cur_byte; + } + ++bits_read; + return bit; + } + + constexpr u32 ReadBits(std::size_t nBits) { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { + ret |= (ReadBit() & 1) << i; + } + return ret; + } + + template <std::size_t nBits> + constexpr u32 ReadBits() { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { + ret |= (ReadBit() & 1) << i; + } + return ret; + } + +private: + const u8* cur_byte; + size_t total_bits = 0; + size_t next_bit = 0; + size_t bits_read = 0; +}; + +class OutputBitStream { +public: + constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) + : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} + + constexpr std::size_t GetBitsWritten() const { + return bits_written; + } + + constexpr void WriteBitsR(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { + WriteBit((val >> (nBits - i - 1)) & 1); + } + } + + constexpr void WriteBits(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { + WriteBit((val >> i) & 1); + } + } + +private: + constexpr void WriteBit(bool b) { + if (bits_written >= num_bits) { + return; + } + + const u32 mask = 1 << next_bit++; + + // clear the bit + *cur_byte &= static_cast<u8>(~mask); + + // Write the bit, if necessary + if (b) + *cur_byte |= static_cast<u8>(mask); + + // Next byte? + if (next_bit >= 8) { + cur_byte += 1; + next_bit = 0; + } + } + + u8* cur_byte; + std::size_t num_bits; + std::size_t bits_written = 0; + std::size_t next_bit = 0; +}; + +template <typename IntType> +class Bits { +public: + explicit Bits(const IntType& v) : m_Bits(v) {} + + Bits(const Bits&) = delete; + Bits& operator=(const Bits&) = delete; + + u8 operator[](u32 bitPos) const { + return static_cast<u8>((m_Bits >> bitPos) & 1); + } + + IntType operator()(u32 start, u32 end) const { + if (start == end) { + return (*this)[start]; + } else if (start > end) { + u32 t = start; + start = end; + end = t; + } + + u64 mask = (1 << (end - start + 1)) - 1; + return (m_Bits >> start) & static_cast<IntType>(mask); + } + +private: + const IntType& m_Bits; +}; + +namespace Tegra::Texture::ASTC { +using IntegerEncodedVector = boost::container::static_vector< + IntegerEncodedValue, 256, + boost::container::static_vector_options< + boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, + boost::container::throw_on_overflow<false>>::type>; + +static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + std::array<u32, 5> m; + std::array<u32, 5> t; + u32 T; + + // Read the trit encoded block according to + // table C.2.14 + m[0] = bits.ReadBits(nBitsPerValue); + T = bits.ReadBits<2>(); + m[1] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 2; + m[2] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 4; + m[3] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 5; + m[4] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 7; + + u32 C = 0; + + Bits<u32> Tb(T); + if (Tb(2, 4) == 7) { + C = (Tb(5, 7) << 2) | Tb(0, 1); + t[4] = t[3] = 2; + } else { + C = Tb(0, 4); + if (Tb(5, 6) == 3) { + t[4] = 2; + t[3] = Tb[7]; + } else { + t[4] = Tb[7]; + t[3] = Tb(5, 6); + } + } + + Bits<u32> Cb(C); + if (Cb(0, 1) == 3) { + t[2] = 2; + t[1] = Cb[4]; + t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); + } else if (Cb(2, 3) == 3) { + t[2] = 2; + t[1] = 2; + t[0] = Cb(0, 1); + } else { + t[2] = Cb[4]; + t[1] = Cb(2, 3); + t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); + } + + for (std::size_t i = 0; i < 5; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); + val.bit_value = m[i]; + val.trit_value = t[i]; + } +} + +static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result, + u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + u32 m[3]; + u32 q[3]; + u32 Q; + + // Read the trit encoded block according to + // table C.2.15 + m[0] = bits.ReadBits(nBitsPerValue); + Q = bits.ReadBits<3>(); + m[1] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 3; + m[2] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 5; + + Bits<u32> Qb(Q); + if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { + q[0] = q[1] = 4; + q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); + } else { + u32 C = 0; + if (Qb(1, 2) == 3) { + q[2] = 4; + C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; + } else { + q[2] = Qb(5, 6); + C = Qb(0, 4); + } + + Bits<u32> Cb(C); + if (Cb(0, 2) == 5) { + q[1] = 4; + q[0] = Cb(3, 4); + } else { + q[1] = Cb(3, 4); + q[0] = Cb(0, 2); + } + } + + for (std::size_t i = 0; i < 3; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Quint, nBitsPerValue); + val.bit_value = m[i]; + val.quint_value = q[i]; + } +} + +// Fills result with the values that are encoded in the given +// bitstream. We must know beforehand what the maximum possible +// value is, and how many values we're decoding. +static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, + u32 nValues) { + // Determine encoding parameters + IntegerEncodedValue val = EncodingsValues[maxRange]; + + // Start decoding + u32 nValsDecoded = 0; + while (nValsDecoded < nValues) { + switch (val.encoding) { + case IntegerEncoding::Quint: + DecodeQuintBlock(bits, result, val.num_bits); + nValsDecoded += 3; + break; + + case IntegerEncoding::Trit: + DecodeTritBlock(bits, result, val.num_bits); + nValsDecoded += 5; + break; + + case IntegerEncoding::JustBits: + val.bit_value = bits.ReadBits(val.num_bits); + result.push_back(val); + nValsDecoded++; + break; + } + } +} + +struct TexelWeightParams { + u32 m_Width = 0; + u32 m_Height = 0; + bool m_bDualPlane = false; + u32 m_MaxWeight = 0; + bool m_bError = false; + bool m_bVoidExtentLDR = false; + bool m_bVoidExtentHDR = false; + + u32 GetPackedBitSize() const { + // How many indices do we have? + u32 nIdxs = m_Height * m_Width; + if (m_bDualPlane) { + nIdxs *= 2; + } + + return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); + } + + u32 GetNumWeightValues() const { + u32 ret = m_Width * m_Height; + if (m_bDualPlane) { + ret *= 2; + } + return ret; + } +}; + +static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { + TexelWeightParams params; + + // Read the entire block mode all at once + u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); + + // Does this match the void extent block mode? + if ((modeBits & 0x01FF) == 0x1FC) { + if (modeBits & 0x200) { + params.m_bVoidExtentHDR = true; + } else { + params.m_bVoidExtentLDR = true; + } + + // Next two bits must be one. + if (!(modeBits & 0x400) || !strm.ReadBit()) { + params.m_bError = true; + } + + return params; + } + + // First check if the last four bits are zero + if ((modeBits & 0xF) == 0) { + params.m_bError = true; + return params; + } + + // If the last two bits are zero, then if bits + // [6-8] are all ones, this is also reserved. + if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) { + params.m_bError = true; + return params; + } + + // Otherwise, there is no error... Figure out the layout + // of the block mode. Layout is determined by a number + // between 0 and 9 corresponding to table C.2.8 of the + // ASTC spec. + u32 layout = 0; + + if ((modeBits & 0x1) || (modeBits & 0x2)) { + // layout is in [0-4] + if (modeBits & 0x8) { + // layout is in [2-4] + if (modeBits & 0x4) { + // layout is in [3-4] + if (modeBits & 0x100) { + layout = 4; + } else { + layout = 3; + } + } else { + layout = 2; + } + } else { + // layout is in [0-1] + if (modeBits & 0x4) { + layout = 1; + } else { + layout = 0; + } + } + } else { + // layout is in [5-9] + if (modeBits & 0x100) { + // layout is in [7-9] + if (modeBits & 0x80) { + // layout is in [7-8] + assert((modeBits & 0x40) == 0U); + if (modeBits & 0x20) { + layout = 8; + } else { + layout = 7; + } + } else { + layout = 9; + } + } else { + // layout is in [5-6] + if (modeBits & 0x80) { + layout = 6; + } else { + layout = 5; + } + } + } + + assert(layout < 10); + + // Determine R + u32 R = !!(modeBits & 0x10); + if (layout < 5) { + R |= (modeBits & 0x3) << 1; + } else { + R |= (modeBits & 0xC) >> 1; + } + assert(2 <= R && R <= 7); + + // Determine width & height + switch (layout) { + case 0: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; + params.m_Width = B + 4; + params.m_Height = A + 2; + break; + } + + case 1: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; + params.m_Width = B + 8; + params.m_Height = A + 2; + break; + } + + case 2: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; + params.m_Width = A + 2; + params.m_Height = B + 8; + break; + } + + case 3: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; + params.m_Width = A + 2; + params.m_Height = B + 6; + break; + } + + case 4: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; + params.m_Width = B + 2; + params.m_Height = A + 2; + break; + } + + case 5: { + u32 A = (modeBits >> 5) & 0x3; + params.m_Width = 12; + params.m_Height = A + 2; + break; + } + + case 6: { + u32 A = (modeBits >> 5) & 0x3; + params.m_Width = A + 2; + params.m_Height = 12; + break; + } + + case 7: { + params.m_Width = 6; + params.m_Height = 10; + break; + } + + case 8: { + params.m_Width = 10; + params.m_Height = 6; + break; + } + + case 9: { + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 9) & 0x3; + params.m_Width = A + 6; + params.m_Height = B + 6; + break; + } + + default: + assert(false && "Don't know this layout..."); + params.m_bError = true; + break; + } + + // Determine whether or not we're using dual planes + // and/or high precision layouts. + bool D = (layout != 9) && (modeBits & 0x400); + bool H = (layout != 9) && (modeBits & 0x200); + + if (H) { + const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; + params.m_MaxWeight = maxWeights[R - 2]; + } else { + const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; + params.m_MaxWeight = maxWeights[R - 2]; + } + + params.m_bDualPlane = D; + + return params; +} + +static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, + u32 blockHeight) { + // Don't actually care about the void extent, just read the bits... + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); + } + + // Decode the RGBA components and renormalize them to the range [0, 255] + u16 r = static_cast<u16>(strm.ReadBits<16>()); + u16 g = static_cast<u16>(strm.ReadBits<16>()); + u16 b = static_cast<u16>(strm.ReadBits<16>()); + u16 a = static_cast<u16>(strm.ReadBits<16>()); + + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | + (static_cast<u32>(a) & 0xFF00) << 16; + + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = rgba; + } + } +} + +static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { + outBuf[j * blockWidth + i] = 0xFFFF00FF; + } + } +} +static constexpr u32 ReplicateByteTo16(std::size_t value) { + return REPLICATE_BYTE_TO_16_TABLE[value]; +} + +static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); +static constexpr u32 ReplicateBitTo7(std::size_t value) { + return REPLICATE_BIT_TO_7_TABLE[value]; +} + +static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); +static constexpr u32 ReplicateBitTo9(std::size_t value) { + return REPLICATE_BIT_TO_9_TABLE[value]; +} + +static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); +static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); +static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); +static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); +static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); +/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback +/// to the runtime implementation +static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { + switch (num_bits) { + case 1: + return REPLICATE_1_BIT_TO_8_TABLE[value]; + case 2: + return REPLICATE_2_BIT_TO_8_TABLE[value]; + case 3: + return REPLICATE_3_BIT_TO_8_TABLE[value]; + case 4: + return REPLICATE_4_BIT_TO_8_TABLE[value]; + case 5: + return REPLICATE_5_BIT_TO_8_TABLE[value]; + case 6: + return REPLICATE_6_BIT_TO_8_TABLE[value]; + case 7: + return REPLICATE_7_BIT_TO_8_TABLE[value]; + case 8: + return REPLICATE_8_BIT_TO_8_TABLE[value]; + default: + return Replicate(value, num_bits, 8); + } +} + +static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); +static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); +static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); +static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); +static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); +static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { + switch (num_bits) { + case 1: + return REPLICATE_1_BIT_TO_6_TABLE[value]; + case 2: + return REPLICATE_2_BIT_TO_6_TABLE[value]; + case 3: + return REPLICATE_3_BIT_TO_6_TABLE[value]; + case 4: + return REPLICATE_4_BIT_TO_6_TABLE[value]; + case 5: + return REPLICATE_5_BIT_TO_6_TABLE[value]; + default: + return Replicate(value, num_bits, 6); + } +} + +class Pixel { +protected: + using ChannelType = s16; + u8 m_BitDepth[4] = {8, 8, 8, 8}; + s16 color[4] = {}; + +public: + Pixel() = default; + Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) + : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, + color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), + static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} + + // Changes the depth of each pixel. This scales the values to + // the appropriate bit depth by either truncating the least + // significant bits when going from larger to smaller bit depth + // or by repeating the most significant bits when going from + // smaller to larger bit depths. + void ChangeBitDepth() { + for (u32 i = 0; i < 4; i++) { + Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); + m_BitDepth[i] = 8; + } + } + + template <typename IntType> + static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { + float denominator = static_cast<float>((1 << bitDepth) - 1); + return static_cast<float>(channel) / denominator; + } + + // Changes the bit depth of a single component. See the comment + // above for how we do this. + static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { + assert(oldDepth <= 8); + + if (oldDepth == 8) { + // Do nothing + return val; + } else if (oldDepth == 0) { + return static_cast<ChannelType>((1 << 8) - 1); + } else if (8 > oldDepth) { + return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); + } else { + // oldDepth > newDepth + const u8 bitsWasted = static_cast<u8>(oldDepth - 8); + u16 v = static_cast<u16>(val); + v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); + v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); + return static_cast<u8>(v); + } + + assert(false && "We shouldn't get here."); + return 0; + } + + const ChannelType& A() const { + return color[0]; + } + ChannelType& A() { + return color[0]; + } + const ChannelType& R() const { + return color[1]; + } + ChannelType& R() { + return color[1]; + } + const ChannelType& G() const { + return color[2]; + } + ChannelType& G() { + return color[2]; + } + const ChannelType& B() const { + return color[3]; + } + ChannelType& B() { + return color[3]; + } + const ChannelType& Component(u32 idx) const { + return color[idx]; + } + ChannelType& Component(u32 idx) { + return color[idx]; + } + + void GetBitDepth(u8 (&outDepth)[4]) const { + for (s32 i = 0; i < 4; i++) { + outDepth[i] = m_BitDepth[i]; + } + } + + // Take all of the components, transform them to their 8-bit variants, + // and then pack each channel into an R8G8B8A8 32-bit integer. We assume + // that the architecture is little-endian, so the alpha channel will end + // up in the most-significant byte. + u32 Pack() const { + Pixel eightBit(*this); + eightBit.ChangeBitDepth(); + + u32 r = 0; + r |= eightBit.A(); + r <<= 8; + r |= eightBit.B(); + r <<= 8; + r |= eightBit.G(); + r <<= 8; + r |= eightBit.R(); + return r; + } + + // Clamps the pixel to the range [0,255] + void ClampByte() { + for (u32 i = 0; i < 4; i++) { + color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); + } + } + + void MakeOpaque() { + A() = 255; + } +}; + +static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions, + const u32 nBitsForColorData) { + // First figure out how many color values we have + u32 nValues = 0; + for (u32 i = 0; i < nPartitions; i++) { + nValues += ((modes[i] >> 2) + 1) << 1; + } + + // Then based on the number of values and the remaining number of bits, + // figure out the max value for each of them... + u32 range = 256; + while (--range > 0) { + IntegerEncodedValue val = EncodingsValues[range]; + u32 bitLength = val.GetBitLength(nValues); + if (bitLength <= nBitsForColorData) { + // Find the smallest possible range that matches the given encoding + while (--range > 0) { + IntegerEncodedValue newval = EncodingsValues[range]; + if (!newval.MatchesEncoding(val)) { + break; + } + } + + // Return to last matching range. + range++; + break; + } + } + + // We now have enough to decode our integer sequence. + IntegerEncodedVector decodedColorValues; + + InputBitStream colorStream(data, 0); + DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); + + // Once we have the decoded values, we need to dequantize them to the 0-255 range + // This procedure is outlined in ASTC spec C.2.13 + u32 outIdx = 0; + for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { + // Have we already decoded all that we need? + if (outIdx >= nValues) { + break; + } + + const IntegerEncodedValue& val = *itr; + u32 bitlen = val.num_bits; + u32 bitval = val.bit_value; + + assert(bitlen >= 1); + + u32 A = 0, B = 0, C = 0, D = 0; + // A is just the lsb replicated 9 times. + A = ReplicateBitTo9(bitval & 1); + + switch (val.encoding) { + // Replicate bits + case IntegerEncoding::JustBits: + out[outIdx++] = FastReplicateTo8(bitval, bitlen); + break; + + // Use algorithm in C.2.13 + case IntegerEncoding::Trit: { + + D = val.trit_value; + + switch (bitlen) { + case 1: { + C = 204; + } break; + + case 2: { + C = 93; + // B = b000b0bb0 + u32 b = (bitval >> 1) & 1; + B = (b << 8) | (b << 4) | (b << 2) | (b << 1); + } break; + + case 3: { + C = 44; + // B = cb000cbcb + u32 cb = (bitval >> 1) & 3; + B = (cb << 7) | (cb << 2) | cb; + } break; + + case 4: { + C = 22; + // B = dcb000dcb + u32 dcb = (bitval >> 1) & 7; + B = (dcb << 6) | dcb; + } break; + + case 5: { + C = 11; + // B = edcb000ed + u32 edcb = (bitval >> 1) & 0xF; + B = (edcb << 5) | (edcb >> 2); + } break; + + case 6: { + C = 5; + // B = fedcb000f + u32 fedcb = (bitval >> 1) & 0x1F; + B = (fedcb << 4) | (fedcb >> 4); + } break; + + default: + assert(false && "Unsupported trit encoding for color values!"); + break; + } // switch(bitlen) + } // case IntegerEncoding::Trit + break; + + case IntegerEncoding::Quint: { + + D = val.quint_value; + + switch (bitlen) { + case 1: { + C = 113; + } break; + + case 2: { + C = 54; + // B = b0000bb00 + u32 b = (bitval >> 1) & 1; + B = (b << 8) | (b << 3) | (b << 2); + } break; + + case 3: { + C = 26; + // B = cb0000cbc + u32 cb = (bitval >> 1) & 3; + B = (cb << 7) | (cb << 1) | (cb >> 1); + } break; + + case 4: { + C = 13; + // B = dcb0000dc + u32 dcb = (bitval >> 1) & 7; + B = (dcb << 6) | (dcb >> 1); + } break; + + case 5: { + C = 6; + // B = edcb0000e + u32 edcb = (bitval >> 1) & 0xF; + B = (edcb << 5) | (edcb >> 3); + } break; + + default: + assert(false && "Unsupported quint encoding for color values!"); + break; + } // switch(bitlen) + } // case IntegerEncoding::Quint + break; + } // switch(val.encoding) + + if (val.encoding != IntegerEncoding::JustBits) { + u32 T = D * C + B; + T ^= A; + T = (A & 0x80) | (T >> 2); + out[outIdx++] = T; + } + } + + // Make sure that each of our values is in the proper range... + for (u32 i = 0; i < nValues; i++) { + assert(out[i] <= 255); + } +} + +static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { + u32 bitval = val.bit_value; + u32 bitlen = val.num_bits; + + u32 A = ReplicateBitTo7(bitval & 1); + u32 B = 0, C = 0, D = 0; + + u32 result = 0; + switch (val.encoding) { + case IntegerEncoding::JustBits: + result = FastReplicateTo6(bitval, bitlen); + break; + + case IntegerEncoding::Trit: { + D = val.trit_value; + assert(D < 3); + + switch (bitlen) { + case 0: { + u32 results[3] = {0, 32, 63}; + result = results[D]; + } break; + + case 1: { + C = 50; + } break; + + case 2: { + C = 23; + u32 b = (bitval >> 1) & 1; + B = (b << 6) | (b << 2) | b; + } break; + + case 3: { + C = 11; + u32 cb = (bitval >> 1) & 3; + B = (cb << 5) | cb; + } break; + + default: + assert(false && "Invalid trit encoding for texel weight"); + break; + } + } break; + + case IntegerEncoding::Quint: { + D = val.quint_value; + assert(D < 5); + + switch (bitlen) { + case 0: { + u32 results[5] = {0, 16, 32, 47, 63}; + result = results[D]; + } break; + + case 1: { + C = 28; + } break; + + case 2: { + C = 13; + u32 b = (bitval >> 1) & 1; + B = (b << 6) | (b << 1); + } break; + + default: + assert(false && "Invalid quint encoding for texel weight"); + break; + } + } break; + } + + if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { + // Decode the value... + result = D * C + B; + result ^= A; + result = (A & 0x20) | (result >> 2); + } + + assert(result < 64); + + // Change from [0,63] to [0,64] + if (result > 32) { + result += 1; + } + + return result; +} + +static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, + const TexelWeightParams& params, const u32 blockWidth, + const u32 blockHeight) { + u32 weightIdx = 0; + u32 unquantized[2][144]; + + for (auto itr = weights.begin(); itr != weights.end(); ++itr) { + unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); + + if (params.m_bDualPlane) { + ++itr; + unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr); + if (itr == weights.end()) { + break; + } + } + + if (++weightIdx >= (params.m_Width * params.m_Height)) + break; + } + + // Do infill if necessary (Section C.2.18) ... + u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); + u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); + + const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; + for (u32 plane = 0; plane < kPlaneScale; plane++) + for (u32 t = 0; t < blockHeight; t++) + for (u32 s = 0; s < blockWidth; s++) { + u32 cs = Ds * s; + u32 ct = Dt * t; + + u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; + u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; + + u32 js = gs >> 4; + u32 fs = gs & 0xF; + + u32 jt = gt >> 4; + u32 ft = gt & 0x0F; + + u32 w11 = (fs * ft + 8) >> 4; + u32 w10 = ft - w11; + u32 w01 = fs - w11; + u32 w00 = 16 - fs - ft + w11; + + u32 v0 = js + jt * params.m_Width; + +#define FIND_TEXEL(tidx, bidx) \ + u32 p##bidx = 0; \ + do { \ + if ((tidx) < (params.m_Width * params.m_Height)) { \ + p##bidx = unquantized[plane][(tidx)]; \ + } \ + } while (0) + + FIND_TEXEL(v0, 00); + FIND_TEXEL(v0 + 1, 01); + FIND_TEXEL(v0 + params.m_Width, 10); + FIND_TEXEL(v0 + params.m_Width + 1, 11); + +#undef FIND_TEXEL + + out[plane][t * blockWidth + s] = + (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; + } +} + +// Transfers a bit as described in C.2.14 +static inline void BitTransferSigned(int& a, int& b) { + b >>= 1; + b |= a & 0x80; + a >>= 1; + a &= 0x3F; + if (a & 0x20) + a -= 0x40; +} + +// Adds more precision to the blue channel as described +// in C.2.14 +static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { + return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), + static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); +} + +// Partition selection functions as specified in +// C.2.21 +static inline u32 hash52(u32 p) { + p ^= p >> 15; + p -= p << 17; + p += p << 7; + p += p << 4; + p ^= p >> 5; + p += p << 16; + p ^= p >> 7; + p ^= p >> 3; + p ^= p << 6; + p ^= p >> 17; + return p; +} + +static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { + if (1 == partitionCount) + return 0; + + if (smallBlock) { + x <<= 1; + y <<= 1; + z <<= 1; + } + + seed += (partitionCount - 1) * 1024; + + u32 rnum = hash52(static_cast<u32>(seed)); + u8 seed1 = static_cast<u8>(rnum & 0xF); + u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); + u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); + u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); + u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); + u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); + u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); + u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); + u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); + u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); + u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); + u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); + + seed1 = static_cast<u8>(seed1 * seed1); + seed2 = static_cast<u8>(seed2 * seed2); + seed3 = static_cast<u8>(seed3 * seed3); + seed4 = static_cast<u8>(seed4 * seed4); + seed5 = static_cast<u8>(seed5 * seed5); + seed6 = static_cast<u8>(seed6 * seed6); + seed7 = static_cast<u8>(seed7 * seed7); + seed8 = static_cast<u8>(seed8 * seed8); + seed9 = static_cast<u8>(seed9 * seed9); + seed10 = static_cast<u8>(seed10 * seed10); + seed11 = static_cast<u8>(seed11 * seed11); + seed12 = static_cast<u8>(seed12 * seed12); + + s32 sh1, sh2, sh3; + if (seed & 1) { + sh1 = (seed & 2) ? 4 : 5; + sh2 = (partitionCount == 3) ? 6 : 5; + } else { + sh1 = (partitionCount == 3) ? 6 : 5; + sh2 = (seed & 2) ? 4 : 5; + } + sh3 = (seed & 0x10) ? sh1 : sh2; + + seed1 = static_cast<u8>(seed1 >> sh1); + seed2 = static_cast<u8>(seed2 >> sh2); + seed3 = static_cast<u8>(seed3 >> sh1); + seed4 = static_cast<u8>(seed4 >> sh2); + seed5 = static_cast<u8>(seed5 >> sh1); + seed6 = static_cast<u8>(seed6 >> sh2); + seed7 = static_cast<u8>(seed7 >> sh1); + seed8 = static_cast<u8>(seed8 >> sh2); + seed9 = static_cast<u8>(seed9 >> sh3); + seed10 = static_cast<u8>(seed10 >> sh3); + seed11 = static_cast<u8>(seed11 >> sh3); + seed12 = static_cast<u8>(seed12 >> sh3); + + s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); + s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); + s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); + s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + + a &= 0x3F; + b &= 0x3F; + c &= 0x3F; + d &= 0x3F; + + if (partitionCount < 4) + d = 0; + if (partitionCount < 3) + c = 0; + + if (a >= b && a >= c && a >= d) + return 0; + else if (b >= c && b >= d) + return 1; + else if (c >= d) + return 2; + return 3; +} + +static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { + return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); +} + +// Section C.2.14 +static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, + u32 colorEndpointMode) { +#define READ_UINT_VALUES(N) \ + u32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ + v[i] = *(colorValues++); \ + } + +#define READ_INT_VALUES(N) \ + s32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ + v[i] = static_cast<int>(*(colorValues++)); \ + } + + switch (colorEndpointMode) { + case 0: { + READ_UINT_VALUES(2) + ep1 = Pixel(0xFF, v[0], v[0], v[0]); + ep2 = Pixel(0xFF, v[1], v[1], v[1]); + } break; + + case 1: { + READ_UINT_VALUES(2) + u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); + u32 L1 = std::min(L0 + (v[1] & 0x3F), 0xFFU); + ep1 = Pixel(0xFF, L0, L0, L0); + ep2 = Pixel(0xFF, L1, L1, L1); + } break; + + case 4: { + READ_UINT_VALUES(4) + ep1 = Pixel(v[2], v[0], v[0], v[0]); + ep2 = Pixel(v[3], v[1], v[1], v[1]); + } break; + + case 5: { + READ_INT_VALUES(4) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + ep1 = Pixel(v[2], v[0], v[0], v[0]); + ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]); + ep1.ClampByte(); + ep2.ClampByte(); + } break; + + case 6: { + READ_UINT_VALUES(4) + ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); + ep2 = Pixel(0xFF, v[0], v[1], v[2]); + } break; + + case 8: { + READ_UINT_VALUES(6) + if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { + ep1 = Pixel(0xFF, v[0], v[2], v[4]); + ep2 = Pixel(0xFF, v[1], v[3], v[5]); + } else { + ep1 = BlueContract(0xFF, v[1], v[3], v[5]); + ep2 = BlueContract(0xFF, v[0], v[2], v[4]); + } + } break; + + case 9: { + READ_INT_VALUES(6) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + BitTransferSigned(v[5], v[4]); + if (v[1] + v[3] + v[5] >= 0) { + ep1 = Pixel(0xFF, v[0], v[2], v[4]); + ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); + } else { + ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); + ep2 = BlueContract(0xFF, v[0], v[2], v[4]); + } + ep1.ClampByte(); + ep2.ClampByte(); + } break; + + case 10: { + READ_UINT_VALUES(6) + ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); + ep2 = Pixel(v[5], v[0], v[1], v[2]); + } break; + + case 12: { + READ_UINT_VALUES(8) + if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { + ep1 = Pixel(v[6], v[0], v[2], v[4]); + ep2 = Pixel(v[7], v[1], v[3], v[5]); + } else { + ep1 = BlueContract(v[7], v[1], v[3], v[5]); + ep2 = BlueContract(v[6], v[0], v[2], v[4]); + } + } break; + + case 13: { + READ_INT_VALUES(8) + BitTransferSigned(v[1], v[0]); + BitTransferSigned(v[3], v[2]); + BitTransferSigned(v[5], v[4]); + BitTransferSigned(v[7], v[6]); + if (v[1] + v[3] + v[5] >= 0) { + ep1 = Pixel(v[6], v[0], v[2], v[4]); + ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]); + } else { + ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]); + ep2 = BlueContract(v[6], v[0], v[2], v[4]); + } + ep1.ClampByte(); + ep2.ClampByte(); + } break; + + default: + assert(false && "Unsupported color endpoint mode (is it HDR?)"); + break; + } + +#undef READ_UINT_VALUES +#undef READ_INT_VALUES +} + +static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, + const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { + InputBitStream strm(inBuf); + TexelWeightParams weightParams = DecodeBlockInfo(strm); + + // Was there an error? + if (weightParams.m_bError) { + assert(false && "Invalid block mode"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_bVoidExtentLDR) { + FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_bVoidExtentHDR) { + assert(false && "HDR void extent blocks are unsupported!"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_Width > blockWidth) { + assert(false && "Texel weight grid width should be smaller than block width"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + if (weightParams.m_Height > blockHeight) { + assert(false && "Texel weight grid height should be smaller than block height"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + // Read num partitions + u32 nPartitions = strm.ReadBits<2>() + 1; + assert(nPartitions <= 4); + + if (nPartitions == 4 && weightParams.m_bDualPlane) { + assert(false && "Dual plane mode is incompatible with four partition blocks"); + FillError(outBuf, blockWidth, blockHeight); + return; + } + + // Based on the number of partitions, read the color endpoint mode for + // each partition. + + // Determine partitions, partition index, and color endpoint modes + s32 planeIdx = -1; + u32 partitionIndex; + u32 colorEndpointMode[4] = {0, 0, 0, 0}; + + // Define color data. + u8 colorEndpointData[16]; + memset(colorEndpointData, 0, sizeof(colorEndpointData)); + OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); + + // Read extra config data... + u32 baseCEM = 0; + if (nPartitions == 1) { + colorEndpointMode[0] = strm.ReadBits<4>(); + partitionIndex = 0; + } else { + partitionIndex = strm.ReadBits<10>(); + baseCEM = strm.ReadBits<6>(); + } + u32 baseMode = (baseCEM & 3); + + // Remaining bits are color endpoint data... + u32 nWeightBits = weightParams.GetPackedBitSize(); + s32 remainingBits = 128 - nWeightBits - static_cast<int>(strm.GetBitsRead()); + + // Consider extra bits prior to texel data... + u32 extraCEMbits = 0; + if (baseMode) { + switch (nPartitions) { + case 2: + extraCEMbits += 2; + break; + case 3: + extraCEMbits += 5; + break; + case 4: + extraCEMbits += 8; + break; + default: + assert(false); + break; + } + } + remainingBits -= extraCEMbits; + + // Do we have a dual plane situation? + u32 planeSelectorBits = 0; + if (weightParams.m_bDualPlane) { + planeSelectorBits = 2; + } + remainingBits -= planeSelectorBits; + + // Read color data... + u32 colorDataBits = remainingBits; + while (remainingBits > 0) { + u32 nb = std::min(remainingBits, 8); + u32 b = strm.ReadBits(nb); + colorEndpointStream.WriteBits(b, nb); + remainingBits -= 8; + } + + // Read the plane selection bits + planeIdx = strm.ReadBits(planeSelectorBits); + + // Read the rest of the CEM + if (baseMode) { + u32 extraCEM = strm.ReadBits(extraCEMbits); + u32 CEM = (extraCEM << 6) | baseCEM; + CEM >>= 2; + + bool C[4] = {0}; + for (u32 i = 0; i < nPartitions; i++) { + C[i] = CEM & 1; + CEM >>= 1; + } + + u8 M[4] = {0}; + for (u32 i = 0; i < nPartitions; i++) { + M[i] = CEM & 3; + CEM >>= 2; + assert(M[i] <= 3); + } + + for (u32 i = 0; i < nPartitions; i++) { + colorEndpointMode[i] = baseMode; + if (!(C[i])) + colorEndpointMode[i] -= 1; + colorEndpointMode[i] <<= 2; + colorEndpointMode[i] |= M[i]; + } + } else if (nPartitions > 1) { + u32 CEM = baseCEM >> 2; + for (u32 i = 0; i < nPartitions; i++) { + colorEndpointMode[i] = CEM; + } + } + + // Make sure everything up till here is sane. + for (u32 i = 0; i < nPartitions; i++) { + assert(colorEndpointMode[i] < 16); + } + assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); + + // Decode both color data and texel weight data + u32 colorValues[32]; // Four values, two endpoints, four maximum paritions + DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, + colorDataBits); + + Pixel endpoints[4][2]; + const u32* colorValuesPtr = colorValues; + for (u32 i = 0; i < nPartitions; i++) { + ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); + } + + // Read the texel weight data.. + std::array<u8, 16> texelWeightData; + std::ranges::copy(inBuf, texelWeightData.begin()); + + // Reverse everything + for (u32 i = 0; i < 8; i++) { +// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits +#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 + u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); + u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); +#undef REVERSE_BYTE + + texelWeightData[i] = b; + texelWeightData[15 - i] = a; + } + + // Make sure that higher non-texel bits are set to zero + const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; + if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) { + texelWeightData[clearByteStart - 1] &= + static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); + std::memset(texelWeightData.data() + clearByteStart, 0, + std::min(16U - clearByteStart, 16U)); + } + + IntegerEncodedVector texelWeightValues; + + InputBitStream weightStream(texelWeightData); + + DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, + weightParams.GetNumWeightValues()); + + // Blocks can be at most 12x12, so we can have as many as 144 weights + u32 weights[2][144]; + UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); + + // Now that we have endpoints and weights, we can interpolate and generate + // the proper decoding... + for (u32 j = 0; j < blockHeight; j++) + for (u32 i = 0; i < blockWidth; i++) { + u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, + (blockHeight * blockWidth) < 32); + assert(partition < nPartitions); + + Pixel p; + for (u32 c = 0; c < 4; c++) { + u32 C0 = endpoints[partition][0].Component(c); + C0 = ReplicateByteTo16(C0); + u32 C1 = endpoints[partition][1].Component(c); + C1 = ReplicateByteTo16(C1); + + u32 plane = 0; + if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { + plane = 1; + } + + u32 weight = weights[plane][j * blockWidth + i]; + u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; + if (C == 65535) { + p.Component(c) = 255; + } else { + double Cf = static_cast<double>(C); + p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); + } + } + + outBuf[j * blockWidth + i] = p.Pack(); + } +} + +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { + u32 block_index = 0; + std::size_t depth_offset = 0; + for (u32 z = 0; z < depth; z++) { + for (u32 y = 0; y < height; y += block_height) { + for (u32 x = 0; x < width; x += block_width) { + const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; + + // Blocks can be at most 12x12 + std::array<u32, 12 * 12> uncompData; + DecompressBlock(blockPtr, block_width, block_height, uncompData); + + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); + + const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); + for (u32 jj = 0; jj < decompHeight; jj++) { + std::memcpy(outRow.data() + jj * width * 4, + uncompData.data() + jj * block_width, decompWidth * 4); + } + ++block_index; + } + } + depth_offset += height * width * 4; + } +} + +} // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index c1c73fda5..c1c37dfe7 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h @@ -129,4 +129,7 @@ struct AstcBufferData { decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; } constexpr ASTC_BUFFER_DATA; +void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, + uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); + } // namespace Tegra::Texture::ASTC diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index 5c64c9bf7..0f60765bb 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp @@ -12,6 +12,14 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, [[maybe_unused]] void* user_data) { + // Skip logging known false-positive validation errors + switch (static_cast<u32>(data->messageIdNumber)) { + case 0x682a878au: // VUID-vkCmdBindVertexBuffers2EXT-pBuffers-parameter + case 0x99fb7dfdu: // UNASSIGNED-RequiredParameter (vkCmdBindVertexBuffers2EXT pBuffers[0]) + return VK_FALSE; + default: + break; + } const std::string_view message{data->pMessage}; if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { LOG_CRITICAL(Render_Vulkan, "{}", message); diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index 67183e64c..e04f7dfc6 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp @@ -100,8 +100,9 @@ struct Client::Impl { request.body = data; httplib::Response response; + httplib::Error error; - if (!cli->send(request, response)) { + if (!cli->send(request, response, error)) { LOG_ERROR(WebService, "{} to {} returned null", method, host + path); return WebResult{WebResult::Code::LibError, "Null response", ""}; } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index a59b36e13..a1e726dc7 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -221,7 +221,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default // This must be in alphabetical order according to action name as it must have the same order as // UISetting::values.shortcuts, which is alphabetically ordered. // clang-format off -const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{ +const std::array<UISettings::Shortcut, 18> Config::default_hotkeys{{ {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::WidgetWithChildrenShortcut}}, {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, @@ -236,6 +236,7 @@ const std::array<UISettings::Shortcut, 17> Config::default_hotkeys{{ {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}}, {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}}, {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, + {QStringLiteral("Toggle Framerate Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+U"), Qt::ApplicationShortcut}}, {QStringLiteral("Toggle Mouse Panning"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F9"), Qt::ApplicationShortcut}}, {QStringLiteral("Toggle Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+Z"), Qt::ApplicationShortcut}}, {QStringLiteral("Toggle Status Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+S"), Qt::WindowShortcut}}, @@ -647,6 +648,8 @@ void Config::ReadDebuggingValues() { ReadSetting(QStringLiteral("program_args"), QString{}).toString().toStdString(); Settings::values.dump_exefs = ReadSetting(QStringLiteral("dump_exefs"), false).toBool(); Settings::values.dump_nso = ReadSetting(QStringLiteral("dump_nso"), false).toBool(); + Settings::values.enable_fs_access_log = + ReadSetting(QStringLiteral("enable_fs_access_log"), false).toBool(); Settings::values.reporting_services = ReadSetting(QStringLiteral("reporting_services"), false).toBool(); Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); @@ -754,6 +757,8 @@ void Config::ReadCpuValues() { QStringLiteral("cpuopt_unsafe_unfuse_fma"), true); ReadSettingGlobal(Settings::values.cpuopt_unsafe_reduce_fp_error, QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true); + ReadSettingGlobal(Settings::values.cpuopt_unsafe_ignore_standard_fpcr, + QStringLiteral("cpuopt_unsafe_ignore_standard_fpcr"), true); ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan, QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true); ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check, @@ -807,7 +812,10 @@ void Config::ReadRendererValues() { QStringLiteral("use_asynchronous_gpu_emulation"), true); ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), true); + ReadSettingGlobal(Settings::values.accelerate_astc, QStringLiteral("accelerate_astc"), true); ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); + ReadSettingGlobal(Settings::values.disable_fps_limit, QStringLiteral("disable_fps_limit"), + false); ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), false); ReadSettingGlobal(Settings::values.use_asynchronous_shaders, @@ -1258,6 +1266,8 @@ void Config::SaveDebuggingValues() { QString::fromStdString(Settings::values.program_args), QString{}); WriteSetting(QStringLiteral("dump_exefs"), Settings::values.dump_exefs, false); WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); + WriteSetting(QStringLiteral("enable_fs_access_log"), Settings::values.enable_fs_access_log, + false); WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); WriteSetting(QStringLiteral("use_debug_asserts"), Settings::values.use_debug_asserts, false); WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false); @@ -1334,6 +1344,8 @@ void Config::SaveCpuValues() { Settings::values.cpuopt_unsafe_unfuse_fma, true); WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), Settings::values.cpuopt_unsafe_reduce_fp_error, true); + WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_ignore_standard_fpcr"), + Settings::values.cpuopt_unsafe_ignore_standard_fpcr, true); WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), Settings::values.cpuopt_unsafe_inaccurate_nan, true); WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"), @@ -1388,7 +1400,10 @@ void Config::SaveRendererValues() { Settings::values.use_asynchronous_gpu_emulation, true); WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, true); + WriteSettingGlobal(QStringLiteral("accelerate_astc"), Settings::values.accelerate_astc, true); WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); + WriteSettingGlobal(QStringLiteral("disable_fps_limit"), Settings::values.disable_fps_limit, + false); WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, false); WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index ce3355588..3c1de0ac9 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -42,7 +42,7 @@ public: default_mouse_buttons; static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; - static const std::array<UISettings::Shortcut, 17> default_hotkeys; + static const std::array<UISettings::Shortcut, 18> default_hotkeys; private: void Initialize(const std::string& config_name); diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp index 22219cbad..13db2ba98 100644 --- a/src/yuzu/configuration/configure_cpu.cpp +++ b/src/yuzu/configuration/configure_cpu.cpp @@ -34,12 +34,15 @@ void ConfigureCpu::SetConfiguration() { ui->accuracy->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock); ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); + ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock); ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); ui->cpuopt_unsafe_reduce_fp_error->setChecked( Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()); + ui->cpuopt_unsafe_ignore_standard_fpcr->setChecked( + Settings::values.cpuopt_unsafe_ignore_standard_fpcr.GetValue()); ui->cpuopt_unsafe_inaccurate_nan->setChecked( Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); ui->cpuopt_unsafe_fastmem_check->setChecked( @@ -84,6 +87,9 @@ void ConfigureCpu::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_reduce_fp_error, ui->cpuopt_unsafe_reduce_fp_error, cpuopt_unsafe_reduce_fp_error); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_standard_fpcr, + ui->cpuopt_unsafe_ignore_standard_fpcr, + cpuopt_unsafe_ignore_standard_fpcr); ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan, ui->cpuopt_unsafe_inaccurate_nan, cpuopt_unsafe_inaccurate_nan); @@ -137,6 +143,9 @@ void ConfigureCpu::SetupPerGameUI() { ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_reduce_fp_error, Settings::values.cpuopt_unsafe_reduce_fp_error, cpuopt_unsafe_reduce_fp_error); + ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_standard_fpcr, + Settings::values.cpuopt_unsafe_ignore_standard_fpcr, + cpuopt_unsafe_ignore_standard_fpcr); ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan, Settings::values.cpuopt_unsafe_inaccurate_nan, cpuopt_unsafe_inaccurate_nan); diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h index 57ff2772a..b2b5f1671 100644 --- a/src/yuzu/configuration/configure_cpu.h +++ b/src/yuzu/configuration/configure_cpu.h @@ -40,6 +40,7 @@ private: ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma; ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error; + ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr; ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check; }; diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui index 31ef9e3f5..0e296d4e5 100644 --- a/src/yuzu/configuration/configure_cpu.ui +++ b/src/yuzu/configuration/configure_cpu.ui @@ -112,6 +112,18 @@ </widget> </item> <item> + <widget class="QCheckBox" name="cpuopt_unsafe_ignore_standard_fpcr"> + <property name="toolTip"> + <string> + <div>This option improves the speed of 32 bits ASIMD floating-point functions by running with incorrect rounding modes.</div> + </string> + </property> + <property name="text"> + <string>Faster ASIMD instructions (32 bits only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="cpuopt_unsafe_inaccurate_nan"> <property name="toolTip"> <string> diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index b207e07cb..15d6a5ad7 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp @@ -28,17 +28,21 @@ ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::Co ConfigureDebug::~ConfigureDebug() = default; void ConfigureDebug::SetConfiguration() { - ui->toggle_console->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + + ui->toggle_console->setEnabled(runtime_lock); ui->toggle_console->setChecked(UISettings::values.show_console); ui->log_filter_edit->setText(QString::fromStdString(Settings::values.log_filter)); ui->homebrew_args_edit->setText(QString::fromStdString(Settings::values.program_args)); + ui->fs_access_log->setEnabled(runtime_lock); + ui->fs_access_log->setChecked(Settings::values.enable_fs_access_log); ui->reporting_services->setChecked(Settings::values.reporting_services); ui->quest_flag->setChecked(Settings::values.quest_flag); ui->use_debug_asserts->setChecked(Settings::values.use_debug_asserts); ui->use_auto_stub->setChecked(Settings::values.use_auto_stub); - ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->enable_graphics_debugging->setEnabled(runtime_lock); ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); - ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->disable_macro_jit->setEnabled(runtime_lock); ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit); ui->extended_logging->setChecked(Settings::values.extended_logging); } @@ -47,6 +51,7 @@ void ConfigureDebug::ApplyConfiguration() { UISettings::values.show_console = ui->toggle_console->isChecked(); Settings::values.log_filter = ui->log_filter_edit->text().toStdString(); Settings::values.program_args = ui->homebrew_args_edit->text().toStdString(); + Settings::values.enable_fs_access_log = ui->fs_access_log->isChecked(); Settings::values.reporting_services = ui->reporting_services->isChecked(); Settings::values.quest_flag = ui->quest_flag->isChecked(); Settings::values.use_debug_asserts = ui->use_debug_asserts->isChecked(); diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index c9e60ee08..c8087542f 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui @@ -144,10 +144,17 @@ <item> <widget class="QGroupBox" name="groupBox_5"> <property name="title"> - <string>Dump</string> + <string>Debugging</string> </property> <layout class="QVBoxLayout" name="verticalLayout_7"> <item> + <widget class="QCheckBox" name="fs_access_log"> + <property name="text"> + <string>Enable FS Access Log</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="reporting_services"> <property name="text"> <string>Enable Verbose Reporting Services</string> diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index fb9ec093c..41a69d9b8 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -70,10 +70,12 @@ void ConfigureGraphics::SetConfiguration() { ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock); + ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( Settings::values.use_asynchronous_gpu_emulation.GetValue()); ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); + ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); @@ -118,6 +120,8 @@ void ConfigureGraphics::ApplyConfiguration() { use_asynchronous_gpu_emulation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, ui->use_nvdec_emulation, use_nvdec_emulation); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, + accelerate_astc); if (Settings::IsConfiguringGlobal()) { // Guard if during game and set to game-specific value @@ -254,6 +258,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); + ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); @@ -269,6 +274,8 @@ void ConfigureGraphics::SetupPerGameUI() { ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); ConfigurationShared::SetColoredTristate( ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); + ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, + accelerate_astc); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, Settings::values.use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index c162048a2..6418115cf 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -47,6 +47,7 @@ private: QColor bg_color; ConfigurationShared::CheckState use_nvdec_emulation; + ConfigurationShared::CheckState accelerate_astc; ConfigurationShared::CheckState use_disk_shader_cache; ConfigurationShared::CheckState use_asynchronous_gpu_emulation; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index ab0bd4d77..5b999d84d 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -105,6 +105,13 @@ </widget> </item> <item> + <widget class="QCheckBox" name="accelerate_astc"> + <property name="text"> + <string>Accelerate ASTC texture decoding</string> + </property> + </widget> + </item> + <item> <widget class="QWidget" name="fullscreen_mode_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 35bf9c6be..1af87e849 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -28,6 +28,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); + ui->disable_fps_limit->setChecked(Settings::values.disable_fps_limit.GetValue()); ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); @@ -57,6 +58,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, ui->anisotropic_filtering_combobox); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.disable_fps_limit, + ui->disable_fps_limit, disable_fps_limit); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, ui->use_assembly_shaders, use_assembly_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, @@ -97,6 +100,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); + ui->disable_fps_limit->setEnabled(Settings::values.disable_fps_limit.UsingGlobal()); ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); @@ -108,6 +112,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { } ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); + ConfigurationShared::SetColoredTristate(ui->disable_fps_limit, + Settings::values.disable_fps_limit, disable_fps_limit); ConfigurationShared::SetColoredTristate( ui->use_assembly_shaders, Settings::values.use_assembly_shaders, use_assembly_shaders); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index e61b571c7..c19c34851 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -35,6 +35,7 @@ private: std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; ConfigurationShared::CheckState use_vsync; + ConfigurationShared::CheckState disable_fps_limit; ConfigurationShared::CheckState use_assembly_shaders; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 846a30586..824cb2fb2 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -77,6 +77,24 @@ </widget> </item> <item> + <widget class="QCheckBox" name="disable_fps_limit"> + <property name="enabled"> + <bool>true</bool> + </property> + <property name="toolTip"> + <string> + <html><head/><body> + <p>Presents guest frames as they become available, disabling the FPS limit in most titles.</p> + <p>NOTE: Will cause instabilities.</p> + </body></html> + </string> + </property> + <property name="text"> + <string>Disable framerate limit (experimental)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="use_assembly_shaders"> <property name="toolTip"> <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index be8933c5c..d4c7d2c0b 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -1025,7 +1025,11 @@ void GMainWindow::InitializeHotkeys() { connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this), &QShortcut::activated, this, [] { Settings::values.audio_muted = !Settings::values.audio_muted; }); - + connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Framerate Limit"), this), + &QShortcut::activated, this, [] { + Settings::values.disable_fps_limit.SetValue( + !Settings::values.disable_fps_limit.GetValue()); + }); connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Toggle Mouse Panning"), this), &QShortcut::activated, this, [&] { Settings::values.mouse_panning = !Settings::values.mouse_panning; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 63f368fe5..60bf66ec0 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -443,12 +443,16 @@ void Config::ReadValues() { sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); Settings::values.use_vsync.SetValue( static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1))); + Settings::values.disable_fps_limit.SetValue( + sdl2_config->GetBoolean("Renderer", "disable_fps_limit", false)); Settings::values.use_assembly_shaders.SetValue( sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true)); Settings::values.use_asynchronous_shaders.SetValue( sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); - Settings::values.use_asynchronous_shaders.SetValue( - sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); + Settings::values.use_nvdec_emulation.SetValue( + sdl2_config->GetBoolean("Renderer", "use_nvdec_emulation", true)); + Settings::values.accelerate_astc.SetValue( + sdl2_config->GetBoolean("Renderer", "accelerate_astc", true)); Settings::values.use_fast_gpu_time.SetValue( sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); @@ -477,6 +481,8 @@ void Config::ReadValues() { Settings::values.program_args = sdl2_config->Get("Debugging", "program_args", ""); Settings::values.dump_exefs = sdl2_config->GetBoolean("Debugging", "dump_exefs", false); Settings::values.dump_nso = sdl2_config->GetBoolean("Debugging", "dump_nso", false); + Settings::values.enable_fs_access_log = + sdl2_config->GetBoolean("Debugging", "enable_fs_access_log", false); Settings::values.reporting_services = sdl2_config->GetBoolean("Debugging", "reporting_services", false); Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index f48d935a1..c960ccf89 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -194,6 +194,14 @@ use_assembly_shaders = # 0 (default): Off, 1: On use_asynchronous_shaders = +# Enable NVDEC emulation. +# 0: Off, 1 (default): On +use_nvdec_emulation = + +# Accelerate ASTC texture decoding. +# 0: Off, 1 (default): On +accelerate_astc = + # Turns on the frame limiter, which will limit frames output to the target game speed # 0: Off, 1: On (default) use_frame_limit = @@ -252,7 +260,10 @@ swap_screen = [Audio] # Which audio output engine to use. -# auto (default): Auto-select, null: No audio output, cubeb: Cubeb audio engine (if available) +# auto (default): Auto-select +# cubeb: Cubeb audio engine (if available) +# sdl2: SDL2 audio engine (if available) +# null: No audio output output_engine = # Whether or not to enable the audio-stretching post-processing effect. @@ -338,6 +349,8 @@ record_frame_times = dump_exefs=false # Determines whether or not yuzu will dump all NSOs it attempts to load while loading them dump_nso=false +# Determines whether or not yuzu will save the filesystem access log. +enable_fs_access_log=false # Determines whether or not yuzu will report to the game that the emulated console is in Kiosk Mode # false: Retail/Normal Mode (default), true: Kiosk Mode quest_flag = @@ -349,6 +362,9 @@ use_debug_asserts = use_auto_stub = # Enables/Disables the macro JIT compiler disable_macro_jit=false +# Presents guest frames as they become available. Experimental. +# false: Disabled (default), true: Enabled +disable_fps_limit=false [WebService] # Whether or not to enable telemetry |