diff options
Diffstat (limited to 'src')
42 files changed, 414 insertions, 195 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 57922b51c..b18a2a2f5 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -53,6 +53,8 @@ add_library(common STATIC div_ceil.h dynamic_library.cpp dynamic_library.h + error.cpp + error.h fiber.cpp fiber.h fs/file.cpp @@ -88,7 +90,6 @@ add_library(common STATIC microprofile.cpp microprofile.h microprofileui.h - misc.cpp nvidia_flags.cpp nvidia_flags.h page_table.cpp diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 53bd7da60..1e74d6930 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -4,9 +4,8 @@ #pragma once -#include <algorithm> #include <array> -#include <string> +#include <iterator> #if !defined(ARCHITECTURE_x86_64) #include <cstdlib> // for exit @@ -49,16 +48,6 @@ __declspec(dllimport) void __stdcall DebugBreak(void); #endif // _MSC_VER ndef -// Generic function to get last error message. -// Call directly after the command or use the error num. -// This function might change the error code. -// Defined in misc.cpp. -[[nodiscard]] std::string GetLastErrorMsg(); - -// Like GetLastErrorMsg(), but passing an explicit error code. -// Defined in misc.cpp. -[[nodiscard]] std::string NativeErrorToString(int e); - #define DECLARE_ENUM_FLAG_OPERATORS(type) \ [[nodiscard]] constexpr type operator|(type a, type b) noexcept { \ using T = std::underlying_type_t<type>; \ diff --git a/src/common/misc.cpp b/src/common/error.cpp index 495385b9e..d4455e310 100644 --- a/src/common/misc.cpp +++ b/src/common/error.cpp @@ -10,7 +10,9 @@ #include <cstring> #endif -#include "common/common_funcs.h" +#include "common/error.h" + +namespace Common { std::string NativeErrorToString(int e) { #ifdef _WIN32 @@ -50,3 +52,5 @@ std::string GetLastErrorMsg() { return NativeErrorToString(errno); #endif } + +} // namespace Common diff --git a/src/common/error.h b/src/common/error.h new file mode 100644 index 000000000..e084d4b0f --- /dev/null +++ b/src/common/error.h @@ -0,0 +1,21 @@ +// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <string> + +namespace Common { + +// Generic function to get last error message. +// Call directly after the command or use the error num. +// This function might change the error code. +// Defined in error.cpp. +[[nodiscard]] std::string GetLastErrorMsg(); + +// Like GetLastErrorMsg(), but passing an explicit error code. +// Defined in error.cpp. +[[nodiscard]] std::string NativeErrorToString(int e); + +} // namespace Common diff --git a/src/common/settings.cpp b/src/common/settings.cpp index fd3b639cd..0d2df80a8 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -54,7 +54,7 @@ void LogSettings() { log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); - log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); + log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); @@ -136,7 +136,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_disk_shader_cache.SetGlobal(true); values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); - values.use_nvdec_emulation.SetGlobal(true); + values.nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); values.shader_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ec4d381e8..b7195670b 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -48,6 +48,12 @@ enum class FullscreenMode : u32 { Exclusive = 1, }; +enum class NvdecEmulation : u32 { + Off = 0, + CPU = 1, + GPU = 2, +}; + /** The BasicSetting class is a simple resource manager. It defines a label and default value * alongside the actual value of the setting for simpler and less-error prone use with frontend * configurations. Setting a default value and label is required, though subclasses may deviate from @@ -466,7 +472,7 @@ struct Values { RangedSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal, GPUAccuracy::Extreme, "gpu_accuracy"}; Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"}; - Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"}; + Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; Setting<bool> accelerate_astc{true, "accelerate_astc"}; Setting<bool> use_vsync{true, "use_vsync"}; BasicRangedSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"}; diff --git a/src/common/thread.cpp b/src/common/thread.cpp index d2c1ac60d..946a1114d 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -2,7 +2,9 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "common/common_funcs.h" +#include <string> + +#include "common/error.h" #include "common/logging/log.h" #include "common/thread.h" #ifdef __APPLE__ @@ -21,8 +23,6 @@ #include <unistd.h> #endif -#include <string> - #ifdef __FreeBSD__ #define cpu_set_t cpuset_t #endif diff --git a/src/core/file_sys/kernel_executable.h b/src/core/file_sys/kernel_executable.h index 044c554d3..79ca82f8b 100644 --- a/src/core/file_sys/kernel_executable.h +++ b/src/core/file_sys/kernel_executable.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <string> #include <vector> #include "common/common_funcs.h" diff --git a/src/core/hle/api_version.h b/src/core/hle/api_version.h index 43d5670a9..626e30753 100644 --- a/src/core/hle/api_version.h +++ b/src/core/hle/api_version.h @@ -28,13 +28,20 @@ constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0"; // Atmosphere version constants. -constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 0; -constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 19; -constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 5; +constexpr u8 ATMOSPHERE_RELEASE_VERSION_MAJOR = 1; +constexpr u8 ATMOSPHERE_RELEASE_VERSION_MINOR = 0; +constexpr u8 ATMOSPHERE_RELEASE_VERSION_MICRO = 0; + +constexpr u32 AtmosphereTargetFirmwareWithRevision(u8 major, u8 minor, u8 micro, u8 rev) { + return u32{major} << 24 | u32{minor} << 16 | u32{micro} << 8 | u32{rev}; +} + +constexpr u32 AtmosphereTargetFirmware(u8 major, u8 minor, u8 micro) { + return AtmosphereTargetFirmwareWithRevision(major, minor, micro, 0); +} constexpr u32 GetTargetFirmware() { - return u32{HOS_VERSION_MAJOR} << 24 | u32{HOS_VERSION_MINOR} << 16 | - u32{HOS_VERSION_MICRO} << 8 | 0U; + return AtmosphereTargetFirmware(HOS_VERSION_MAJOR, HOS_VERSION_MINOR, HOS_VERSION_MICRO); } } // namespace HLE::ApiVersion diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 3a6db0b1c..901d43da9 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <functional> #include <memory> #include <string> #include <unordered_map> diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index ce6065db2..a33e47d0b 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -42,15 +42,14 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {} void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, const Common::Rectangle<int>& crop_rect) { - VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); + const VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", addr, offset, width, height, stride, format); - using PixelFormat = Tegra::FramebufferConfig::PixelFormat; - const Tegra::FramebufferConfig framebuffer{ - addr, offset, width, height, stride, static_cast<PixelFormat>(format), - transform, crop_rect}; + const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); + const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, + stride, pixel_format, transform, crop_rect}; system.GetPerfStats().EndSystemFrame(); system.GPU().SwapBuffers(&framebuffer); diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 759247eb0..78de3f354 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -42,7 +42,9 @@ struct IGBPBuffer { u32_le index; INSERT_PADDING_WORDS(3); u32_le gpu_buffer_id; - INSERT_PADDING_WORDS(17); + INSERT_PADDING_WORDS(6); + u32_le external_format; + INSERT_PADDING_WORDS(10); u32_le nvmap_handle; u32_le offset; INSERT_PADDING_WORDS(60); diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 00bff8caf..3ead813b0 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -298,7 +298,7 @@ void NVFlinger::Compose() { auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0"); ASSERT(nvdisp); - nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format, + nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.external_format, igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer->get().transform, buffer->get().crop_rect); diff --git a/src/core/network/network.cpp b/src/core/network/network.cpp index 4732d4485..72eea52f0 100644 --- a/src/core/network/network.cpp +++ b/src/core/network/network.cpp @@ -7,7 +7,8 @@ #include <limits> #include <utility> #include <vector> -#include "common/common_funcs.h" + +#include "common/error.h" #ifdef _WIN32 #include <winsock2.h> @@ -223,7 +224,7 @@ Errno GetAndLogLastError() { if (err == Errno::AGAIN) { return err; } - LOG_ERROR(Network, "Socket operation error: {}", NativeErrorToString(e)); + LOG_ERROR(Network, "Socket operation error: {}", Common::NativeErrorToString(e)); return err; } diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 5a8cfd301..1f1607998 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) { return "Unknown"; } +static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) { + switch (backend) { + case Settings::NvdecEmulation::Off: + return "Off"; + case Settings::NvdecEmulation::CPU: + return "CPU"; + case Settings::NvdecEmulation::GPU: + return "GPU"; + } + return "Unknown"; +} + u64 GetTelemetryId() { u64 telemetry_id{}; const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; @@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation.GetValue()); - AddField(field_type, "Renderer_UseNvdecEmulation", - Settings::values.use_nvdec_emulation.GetValue()); + AddField(field_type, "Renderer_NvdecEmulation", + TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_ShaderBackend", diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2f6cdd216..269db21a5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -231,6 +231,7 @@ endif() target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) +target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f798a0053..61966cbfe 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -5,6 +5,7 @@ #include <fstream> #include <vector> #include "common/assert.h" +#include "common/settings.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" #include "video_core/command_classes/codecs/vp9.h" @@ -16,108 +17,146 @@ extern "C" { } namespace Tegra { -#if defined(LIBVA_FOUND) -// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license namespace { -constexpr std::array<const char*, 2> VAAPI_DRIVERS = { - "i915", - "amdgpu", -}; +constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; +constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; + +void AVPacketDeleter(AVPacket* ptr) { + av_packet_free(&ptr); +} -AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) { +using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>; + +AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { - if (*p == AV_PIX_FMT_VAAPI) { - return AV_PIX_FMT_VAAPI; + if (*p == av_codec_ctx->pix_fmt) { + return av_codec_ctx->pix_fmt; } } LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); - return *pix_fmts; + av_buffer_unref(&av_codec_ctx->hw_device_ctx); + av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT; + return PREFERRED_CPU_FMT; +} +} // namespace + +void AVFrameDeleter(AVFrame* ptr) { + av_frame_free(&ptr); } -bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) { +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), + vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} + +Codec::~Codec() { + if (!initialized) { + return; + } + // Free libav memory + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); +} + +bool Codec::CreateGpuAvDevice() { +#if defined(LIBVA_FOUND) + static constexpr std::array<const char*, 3> VAAPI_DRIVERS = { + "i915", + "iHD", + "amdgpu", + }; AVDictionary* hwdevice_options = nullptr; av_dict_set(&hwdevice_options, "connection_type", "drm", 0); for (const auto& driver : VAAPI_DRIVERS) { av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); - const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI, + const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, nullptr, hwdevice_options, 0); if (hwdevice_error >= 0) { LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); av_dict_free(&hwdevice_options); + av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; return true; } LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); } LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); av_dict_free(&hwdevice_options); - return false; -} -} // namespace #endif - -void AVFrameDeleter(AVFrame* ptr) { - av_frame_free(&ptr); + static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; + static constexpr std::array GPU_DECODER_TYPES{ + AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, +#else + AV_HWDEVICE_TYPE_VDPAU, +#endif + }; + for (const auto& type : GPU_DECODER_TYPES) { + const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); + if (hwdevice_res < 0) { + LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", + av_hwdevice_get_type_name(type), hwdevice_res); + continue; + } + for (int i = 0;; i++) { + const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); + if (!config) { + LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.", + av_codec->name, av_hwdevice_get_type_name(type)); + break; + } + if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { + av_codec_ctx->pix_fmt = config->pix_fmt; + LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); + return true; + } + } + } + return false; } -Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) - : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), - vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} - -Codec::~Codec() { - if (!initialized) { - return; - } - // Free libav memory - avcodec_send_packet(av_codec_ctx, nullptr); - AVFrame* av_frame = av_frame_alloc(); - avcodec_receive_frame(av_codec_ctx, av_frame); - avcodec_flush_buffers(av_codec_ctx); - av_frame_free(&av_frame); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); +void Codec::InitializeAvCodecContext() { + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); } -void Codec::InitializeHwdec() { - // Prioritize integrated GPU to mitigate bandwidth bottlenecks -#if defined(LIBVA_FOUND) - if (CreateVaapiHwdevice(&av_hw_device)) { - const auto hw_device_ctx = av_buffer_ref(av_hw_device); - ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); - av_codec_ctx->hw_device_ctx = hw_device_ctx; - av_codec_ctx->get_format = GetHwFormat; +void Codec::InitializeGpuDecoder() { + if (!CreateGpuAvDevice()) { + av_buffer_unref(&av_gpu_decoder); return; } -#endif - // TODO more GPU accelerated decoders + auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder); + ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); + av_codec_ctx->hw_device_ctx = hw_device_ctx; + av_codec_ctx->get_format = GetGpuFormat; } void Codec::Initialize() { - AVCodecID codec; - switch (current_codec) { - case NvdecCommon::VideoCodec::H264: - codec = AV_CODEC_ID_H264; - break; - case NvdecCommon::VideoCodec::Vp9: - codec = AV_CODEC_ID_VP9; - break; - default: - UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + const AVCodecID codec = [&] { + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + return AV_CODEC_ID_H264; + case NvdecCommon::VideoCodec::Vp9: + return AV_CODEC_ID_VP9; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + return AV_CODEC_ID_NONE; + } + }(); + av_codec = avcodec_find_decoder(codec); + + InitializeAvCodecContext(); + if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) { + InitializeGpuDecoder(); + } + if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); return; } - av_codec = avcodec_find_decoder(codec); - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - InitializeHwdec(); if (!av_codec_ctx->hw_device_ctx) { LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); } - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); - return; - } initialized = true; } @@ -133,6 +172,9 @@ void Codec::Decode() { if (is_first_frame) { Initialize(); } + if (!initialized) { + return; + } bool vp9_hidden_frame = false; std::vector<u8> frame_data; if (current_codec == NvdecCommon::VideoCodec::H264) { @@ -141,50 +183,48 @@ void Codec::Decode() { frame_data = vp9_decoder->ComposeFrameHeader(state); vp9_hidden_frame = vp9_decoder->WasFrameHidden(); } - AVPacket packet{}; - av_init_packet(&packet); - packet.data = frame_data.data(); - packet.size = static_cast<s32>(frame_data.size()); - if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) { - LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret); + AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; + if (!packet) { + LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); + return; + } + packet->data = frame_data.data(); + packet->size = static_cast<s32>(frame_data.size()); + if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { + LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); return; } // Only receive/store visible frames if (vp9_hidden_frame) { return; } - AVFrame* hw_frame = av_frame_alloc(); - AVFrame* sw_frame = hw_frame; - ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed"); - if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) { + AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; + AVFramePtr final_frame{nullptr, AVFrameDeleter}; + ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); + if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); - av_frame_free(&hw_frame); return; } - if (!hw_frame->width || !hw_frame->height) { + if (initial_frame->width == 0 || initial_frame->height == 0) { LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); - av_frame_free(&hw_frame); return; } -#if defined(LIBVA_FOUND) - // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license - if (hw_frame->format == AV_PIX_FMT_VAAPI) { - sw_frame = av_frame_alloc(); - ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed"); + if (av_codec_ctx->hw_device_ctx) { + final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp // because Intel drivers crash unless using AV_PIX_FMT_NV12 - sw_frame->format = AV_PIX_FMT_NV12; - const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0); - ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret); - av_frame_free(&hw_frame); + final_frame->format = PREFERRED_GPU_FMT; + const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); + ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); + } else { + final_frame = std::move(initial_frame); } -#endif - if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) { - UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format); - av_frame_free(&sw_frame); + if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) { + UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter}); + av_frames.push(std::move(final_frame)); if (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 71936203f..f9a80886f 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -5,6 +5,7 @@ #pragma once #include <memory> +#include <string_view> #include <queue> #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -50,18 +51,23 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec [[nodiscard]] std::string_view GetCurrentCodecName() const; private: - void InitializeHwdec(); + void InitializeAvCodecContext(); + + void InitializeGpuDecoder(); + + bool CreateGpuAvDevice(); bool initialized{}; NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; AVCodec* av_codec{nullptr}; - AVBufferRef* av_hw_device{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; + AVBufferRef* av_gpu_decoder{nullptr}; GPU& gpu; const NvdecCommon::NvdecRegisters& state; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 5fb6d45ee..51ee14c13 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -95,7 +95,8 @@ const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegister const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); - writer.WriteUe(16); + // TODO (ameerj): Where do we get this number, it seems to be particular for each stream + writer.WriteUe(6); // Max number of reference frames writer.WriteBit(false); writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); writer.WriteUe(pic_height - 1); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index c60ed6453..dce00e829 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7c9b0d6db..9ff0a28cd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -164,7 +164,8 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { blit_screen.Recreate(); } const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); - scheduler.Flush(render_semaphore); + const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); + scheduler.Flush(render_semaphore, present_semaphore); scheduler.WaitWorker(); swapchain.Present(render_semaphore); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index cb0580182..888bc7392 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -358,7 +358,7 @@ void VKBlitScreen::CreateDescriptorPool() { void VKBlitScreen::CreateRenderPass() { const VkAttachmentDescription color_attachment{ .flags = 0, - .format = swapchain.GetImageFormat(), + .format = swapchain.GetImageViewFormat(), .samples = VK_SAMPLE_COUNT_1_BIT, .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 8e77e4796..adb557f60 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <mutex> #include <span> #include <vector> diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3ac18ea54..841a6b846 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -228,9 +228,7 @@ void RasterizerVulkan::Clear() { }; const u32 color_attachment = regs.clear_buffers.RT; - const auto attachment_aspect_mask = framebuffer->ImageRanges()[color_attachment].aspectMask; - const bool is_color_rt = (attachment_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; - if (use_color && is_color_rt) { + if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { VkClearValue clear_value; std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); @@ -248,12 +246,15 @@ void RasterizerVulkan::Clear() { return; } VkImageAspectFlags aspect_flags = 0; - if (use_depth) { + if (use_depth && framebuffer->HasAspectDepthBit()) { aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; } - if (use_stencil) { + if (use_stencil && framebuffer->HasAspectStencilBit()) { aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } + if (aspect_flags == 0) { + return; + } scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { VkClearAttachment attachment; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 4840962de..1d438787a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -55,14 +55,14 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(VkSemaphore semaphore) { - SubmitExecution(semaphore); +void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + SubmitExecution(signal_semaphore, wait_semaphore); AllocateNewContext(); } -void VKScheduler::Finish(VkSemaphore semaphore) { +void VKScheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { const u64 presubmit_tick = CurrentTick(); - SubmitExecution(semaphore); + SubmitExecution(signal_semaphore, wait_semaphore); WaitWorker(); Wait(presubmit_tick); AllocateNewContext(); @@ -171,37 +171,41 @@ void VKScheduler::AllocateWorkerCommandBuffer() { }); } -void VKScheduler::SubmitExecution(VkSemaphore semaphore) { +void VKScheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { EndPendingOperations(); InvalidateState(); const u64 signal_value = master_semaphore->NextTick(); - Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { + Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { cmdbuf.End(); - - const u32 num_signal_semaphores = semaphore ? 2U : 1U; - - const u64 wait_value = signal_value - 1; - const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + + const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, semaphore}; + const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + + const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; + const std::array wait_values{signal_value - 1, u64(1)}; + const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; + static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + }; const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreValueCount = 1, - .pWaitSemaphoreValues = &wait_value, + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), .signalSemaphoreValueCount = num_signal_semaphores, .pSignalSemaphoreValues = signal_values.data(), }; const VkSubmitInfo submit_info{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .pNext = &timeline_si, - .waitSemaphoreCount = 1, - .pWaitSemaphores = &timeline_semaphore, - .pWaitDstStageMask = &wait_stage_mask, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), .commandBufferCount = 1, .pCommandBuffers = cmdbuf.address(), .signalSemaphoreCount = num_signal_semaphores, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index cf39a2363..759ed5a48 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -34,10 +34,10 @@ public: ~VKScheduler(); /// Sends the current execution context to the GPU. - void Flush(VkSemaphore semaphore = nullptr); + void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(VkSemaphore semaphore = nullptr); + void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -191,7 +191,7 @@ private: void AllocateWorkerCommandBuffer(); - void SubmitExecution(VkSemaphore semaphore); + void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); void AllocateNewContext(); diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index d990eefba..aadf03cb0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -20,16 +20,15 @@ namespace Vulkan { namespace { -VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) { +VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats) { if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) { VkSurfaceFormatKHR format; format.format = VK_FORMAT_B8G8R8A8_UNORM; format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; return format; } - const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { - const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; - return format.format == request_format && + const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) { + return format.format == VK_FORMAT_B8G8R8A8_UNORM && format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; }); return found != formats.end() ? *found : formats[0]; @@ -107,14 +106,12 @@ void VKSwapchain::AcquireNextImage() { } void VKSwapchain::Present(VkSemaphore render_semaphore) { - const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; - const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; const VkPresentInfoKHR present_info{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = nullptr, - .waitSemaphoreCount = render_semaphore ? 2U : 1U, - .pWaitSemaphores = semaphores.data(), + .waitSemaphoreCount = render_semaphore ? 1U : 0U, + .pWaitSemaphores = &render_semaphore, .swapchainCount = 1, .pSwapchains = swapchain.address(), .pImageIndices = &image_index, @@ -145,7 +142,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; - const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; + const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; u32 requested_image_count{capabilities.minImageCount + 1}; @@ -180,6 +177,17 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); } + static constexpr std::array view_formats{VK_FORMAT_B8G8R8A8_UNORM, VK_FORMAT_B8G8R8A8_SRGB}; + VkImageFormatListCreateInfo format_list{ + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR, + .pNext = nullptr, + .viewFormatCount = static_cast<u32>(view_formats.size()), + .pViewFormats = view_formats.data(), + }; + if (device.IsKhrSwapchainMutableFormatEnabled()) { + format_list.pNext = std::exchange(swapchain_ci.pNext, &format_list); + swapchain_ci.flags |= VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR; + } // Request the size again to reduce the possibility of a TOCTOU race condition. const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); @@ -191,7 +199,7 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, images = swapchain.GetImages(); image_count = static_cast<u32>(images.size()); - image_format = surface_format.format; + image_view_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; } void VKSwapchain::CreateSemaphores() { @@ -207,7 +215,7 @@ void VKSwapchain::CreateImageViews() { .flags = 0, .image = {}, .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = image_format, + .format = image_view_format, .components = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 35c2cdc14..5bce41e21 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -68,8 +68,12 @@ public: return *image_views[index]; } - VkFormat GetImageFormat() const { - return image_format; + VkFormat GetImageViewFormat() const { + return image_view_format; + } + + VkSemaphore CurrentPresentSemaphore() const { + return *present_semaphores[frame_index]; } private: @@ -96,7 +100,7 @@ private: u32 image_index{}; u32 frame_index{}; - VkFormat image_format{}; + VkFormat image_view_format{}; VkExtent2D extent{}; bool current_srgb{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8f4df7122..ff979a7ac 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1186,9 +1186,12 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM renderpass_key.depth_format = depth_buffer->format; num_layers = std::max(num_layers, depth_buffer->range.extent.layers); images[num_images] = depth_buffer->ImageHandle(); - image_ranges[num_images] = MakeSubresourceRange(depth_buffer); + const VkImageSubresourceRange subresource_range = MakeSubresourceRange(depth_buffer); + image_ranges[num_images] = subresource_range; samples = depth_buffer->Samples(); ++num_images; + has_depth = (subresource_range.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0; + has_stencil = (subresource_range.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0; } else { renderpass_key.depth_format = PixelFormat::Invalid; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 5fe6b7ba3..6d5a68bfe 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -232,6 +232,18 @@ public: return image_ranges; } + [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept { + return (image_ranges.at(index).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + } + + [[nodiscard]] bool HasAspectDepthBit() const noexcept { + return has_depth; + } + + [[nodiscard]] bool HasAspectStencilBit() const noexcept { + return has_stencil; + } + private: vk::Framebuffer framebuffer; VkRenderPass renderpass{}; @@ -241,6 +253,8 @@ private: u32 num_images = 0; std::array<VkImage, 9> images{}; std::array<VkImageSubresourceRange, 9> image_ranges{}; + bool has_depth{}; + bool has_stencil{}; }; struct TextureCacheParams { diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 8a4581c19..81a878bb2 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <filesystem> #include <fstream> #include <memory> diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 6180b8c0e..74cd3c9d8 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -4,6 +4,7 @@ #pragma once +#include <algorithm> #include <array> #include <bit> #include <concepts> diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 3b575db4d..cae543a51 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -37,7 +37,8 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer( namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); + const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); + const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); auto context = emu_window.CreateSharedContext(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 86ca4be54..24821c1a3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -839,6 +839,8 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { bool has_khr_shader_float16_int8{}; bool has_khr_workgroup_memory_explicit_layout{}; bool has_khr_pipeline_executable_properties{}; + bool has_khr_image_format_list{}; + bool has_khr_swapchain_mutable_format{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; bool has_ext_custom_border_color{}; @@ -888,6 +890,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); test(has_khr_workgroup_memory_explicit_layout, VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); + test(has_khr_image_format_list, VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false); + test(has_khr_swapchain_mutable_format, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, + false); test(has_ext_line_rasterization, VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, false); if (Settings::values.enable_nsight_aftermath) { test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, @@ -1066,6 +1071,11 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { khr_pipeline_executable_properties = true; } } + if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { + extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); + extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); + khr_swapchain_mutable_format = true; + } if (khr_push_descriptor) { VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 234d74129..5599c38c5 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -224,6 +224,11 @@ public: return khr_pipeline_executable_properties; } + /// Returns true if VK_KHR_swapchain_mutable_format is enabled. + bool IsKhrSwapchainMutableFormatEnabled() const { + return khr_swapchain_mutable_format; + } + /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { return khr_workgroup_memory_explicit_layout; @@ -390,6 +395,7 @@ private: bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor. bool khr_pipeline_executable_properties{}; ///< Support for executable properties. + bool khr_swapchain_mutable_format{}; ///< Support for VK_KHR_swapchain_mutable_format. bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax. bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 85d292bcc..8744d8e5d 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -812,7 +812,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_disk_shader_cache); ReadGlobalSetting(Settings::values.gpu_accuracy); ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - ReadGlobalSetting(Settings::values.use_nvdec_emulation); + ReadGlobalSetting(Settings::values.nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); ReadGlobalSetting(Settings::values.shader_backend); @@ -1349,7 +1349,10 @@ void Config::SaveRendererValues() { static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()), Settings::values.gpu_accuracy.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - WriteGlobalSetting(Settings::values.use_nvdec_emulation); + WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()), + static_cast<u32>(Settings::values.nvdec_emulation.GetValue(global)), + static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()), + Settings::values.nvdec_emulation.UsingGlobal()); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 9555f4498..4733227b6 100644 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -182,5 +182,6 @@ private: Q_DECLARE_METATYPE(Settings::CPUAccuracy); Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::FullscreenMode); +Q_DECLARE_METATYPE(Settings::NvdecEmulation); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 37e896258..c594164be 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() { ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); - ui->use_nvdec_emulation->setEnabled(runtime_lock); + ui->nvdec_emulation_widget->setEnabled(runtime_lock); ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( Settings::values.use_asynchronous_gpu_emulation.GetValue()); - ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); ui->fullscreen_mode_combobox->setCurrentIndex( static_cast<int>(Settings::values.fullscreen_mode.GetValue())); + ui->nvdec_emulation->setCurrentIndex( + static_cast<int>(Settings::values.nvdec_emulation.GetValue())); ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation, + &Settings::values.nvdec_emulation); + ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget, + !Settings::values.nvdec_emulation.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, &Settings::values.fullscreen_mode); ConfigurationShared::SetHighlight(ui->fullscreen_mode_label, @@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, ui->use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, - ui->use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, accelerate_astc); @@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.nvdec_emulation.UsingGlobal()) { + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } if (Settings::values.shader_backend.UsingGlobal()) { Settings::values.shader_backend.SetValue(shader_backend); } @@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() { } } + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + } else { + Settings::values.nvdec_emulation.SetGlobal(false); + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } + if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.bg_red.SetGlobal(true); Settings::values.bg_green.SetGlobal(true); @@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { ConfigurationShared::USE_GLOBAL_OFFSET); } +Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const { + if (Settings::IsConfiguringGlobal()) { + return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex()); + } + + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + return Settings::values.nvdec_emulation.GetValue(); + } + Settings::values.nvdec_emulation.SetGlobal(false); + return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex() - + ConfigurationShared::USE_GLOBAL_OFFSET); +} + void ConfigureGraphics::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal()); @@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); - ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); + ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); @@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredTristate( ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); - ConfigurationShared::SetColoredTristate( - ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, accelerate_astc); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, @@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() { static_cast<int>(Settings::values.fullscreen_mode.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); + ConfigurationShared::InsertGlobalItem( + ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true))); } diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index c866b911b..7d7ac329d 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -43,6 +43,7 @@ private: void SetupPerGameUI(); Settings::RendererBackend GetCurrentGraphicsBackend() const; + Settings::NvdecEmulation GetCurrentNvdecEmulation() const; std::unique_ptr<Ui::ConfigureGraphics> ui; QColor bg_color; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 43f1887d1..1a12cfa4d 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -168,13 +168,6 @@ </widget> </item> <item> - <widget class="QCheckBox" name="use_nvdec_emulation"> - <property name="text"> - <string>Use NVDEC emulation</string> - </property> - </widget> - </item> - <item> <widget class="QCheckBox" name="accelerate_astc"> <property name="text"> <string>Accelerate ASTC texture decoding</string> @@ -182,6 +175,50 @@ </widget> </item> <item> + <widget class="QWidget" name="nvdec_emulation_widget" native="true"> + <layout class="QHBoxLayout" name="nvdec_emulation_layout"> + <property name="leftMargin"> + <number>0</number> + </property> + <property name="topMargin"> + <number>0</number> + </property> + <property name="rightMargin"> + <number>0</number> + </property> + <property name="bottomMargin"> + <number>0</number> + </property> + <item> + <widget class="QLabel" name="nvdec_emulation_label"> + <property name="text"> + <string>NVDEC emulation:</string> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="nvdec_emulation"> + <item> + <property name="text"> + <string>Disabled</string> + </property> + </item> + <item> + <property name="text"> + <string>CPU Decoding</string> + </property> + </item> + <item> + <property name="text"> + <string>GPU Decoding</string> + </property> + </item> + </widget> + </item> + </layout> + </widget> + </item> + <item> <widget class="QWidget" name="fullscreen_mode_layout" native="true"> <layout class="QHBoxLayout" name="horizontalLayout_1"> <property name="leftMargin"> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 757dd1ea0..891f7be6f 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -465,7 +465,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.disable_fps_limit); ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); - ReadSetting("Renderer", Settings::values.use_nvdec_emulation); + ReadSetting("Renderer", Settings::values.nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index e02eceb99..72f3213fb 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -261,9 +261,9 @@ shader_backend = # 0 (default): Off, 1: On use_asynchronous_shaders = -# Enable NVDEC emulation. -# 0: Off, 1 (default): On -use_nvdec_emulation = +# NVDEC emulation. +# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding +nvdec_emulation = # Accelerate ASTC texture decoding. # 0: Off, 1 (default): On |