diff options
Diffstat (limited to 'src')
53 files changed, 1540 insertions, 798 deletions
diff --git a/src/core/file_sys/fsmitm_romfsbuild.cpp b/src/core/file_sys/fsmitm_romfsbuild.cpp index d126ae8dd..2aff2708a 100644 --- a/src/core/file_sys/fsmitm_romfsbuild.cpp +++ b/src/core/file_sys/fsmitm_romfsbuild.cpp @@ -240,7 +240,7 @@ RomFSBuildContext::RomFSBuildContext(VirtualDir base_, VirtualDir ext_) RomFSBuildContext::~RomFSBuildContext() = default; -std::map<u64, VirtualFile> RomFSBuildContext::Build() { +std::multimap<u64, VirtualFile> RomFSBuildContext::Build() { const u64 dir_hash_table_entry_count = romfs_get_hash_table_count(num_dirs); const u64 file_hash_table_entry_count = romfs_get_hash_table_count(num_files); dir_hash_table_size = 4 * dir_hash_table_entry_count; @@ -294,7 +294,7 @@ std::map<u64, VirtualFile> RomFSBuildContext::Build() { cur_dir->parent->child = cur_dir; } - std::map<u64, VirtualFile> out; + std::multimap<u64, VirtualFile> out; // Populate file tables. for (const auto& it : files) { diff --git a/src/core/file_sys/fsmitm_romfsbuild.h b/src/core/file_sys/fsmitm_romfsbuild.h index a62502193..049de180b 100644 --- a/src/core/file_sys/fsmitm_romfsbuild.h +++ b/src/core/file_sys/fsmitm_romfsbuild.h @@ -43,7 +43,7 @@ public: ~RomFSBuildContext(); // This finalizes the context. - std::map<u64, VirtualFile> Build(); + std::multimap<u64, VirtualFile> Build(); private: VirtualDir base; diff --git a/src/core/file_sys/vfs_concat.cpp b/src/core/file_sys/vfs_concat.cpp index 16d801c0c..e0ff70174 100644 --- a/src/core/file_sys/vfs_concat.cpp +++ b/src/core/file_sys/vfs_concat.cpp @@ -11,7 +11,7 @@ namespace FileSys { -static bool VerifyConcatenationMapContinuity(const std::map<u64, VirtualFile>& map) { +static bool VerifyConcatenationMapContinuity(const std::multimap<u64, VirtualFile>& map) { const auto last_valid = --map.end(); for (auto iter = map.begin(); iter != last_valid;) { const auto old = iter++; @@ -27,12 +27,12 @@ ConcatenatedVfsFile::ConcatenatedVfsFile(std::vector<VirtualFile> files_, std::s : name(std::move(name)) { std::size_t next_offset = 0; for (const auto& file : files_) { - files[next_offset] = file; + files.emplace(next_offset, file); next_offset += file->GetSize(); } } -ConcatenatedVfsFile::ConcatenatedVfsFile(std::map<u64, VirtualFile> files_, std::string name) +ConcatenatedVfsFile::ConcatenatedVfsFile(std::multimap<u64, VirtualFile> files_, std::string name) : files(std::move(files_)), name(std::move(name)) { ASSERT(VerifyConcatenationMapContinuity(files)); } @@ -50,7 +50,7 @@ VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(std::vector<VirtualFile> f } VirtualFile ConcatenatedVfsFile::MakeConcatenatedFile(u8 filler_byte, - std::map<u64, VirtualFile> files, + std::multimap<u64, VirtualFile> files, std::string name) { if (files.empty()) return nullptr; diff --git a/src/core/file_sys/vfs_concat.h b/src/core/file_sys/vfs_concat.h index c90f9d5d1..7a26343c0 100644 --- a/src/core/file_sys/vfs_concat.h +++ b/src/core/file_sys/vfs_concat.h @@ -15,7 +15,7 @@ namespace FileSys { // read-only. class ConcatenatedVfsFile : public VfsFile { ConcatenatedVfsFile(std::vector<VirtualFile> files, std::string name); - ConcatenatedVfsFile(std::map<u64, VirtualFile> files, std::string name); + ConcatenatedVfsFile(std::multimap<u64, VirtualFile> files, std::string name); public: ~ConcatenatedVfsFile() override; @@ -25,7 +25,7 @@ public: /// Convenience function that turns a map of offsets to files into a concatenated file, filling /// gaps with a given filler byte. - static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::map<u64, VirtualFile> files, + static VirtualFile MakeConcatenatedFile(u8 filler_byte, std::multimap<u64, VirtualFile> files, std::string name); std::string GetName() const override; @@ -40,7 +40,7 @@ public: private: // Maps starting offset to file -- more efficient. - std::map<u64, VirtualFile> files; + std::multimap<u64, VirtualFile> files; std::string name; }; diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index cadc03805..c66124998 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -55,6 +55,10 @@ std::string VfsDirectoryServiceWrapper::GetName() const { ResultCode VfsDirectoryServiceWrapper::CreateFile(const std::string& path_, u64 size) const { std::string path(FileUtil::SanitizePath(path_)); auto dir = GetDirectoryRelativeWrapped(backing, FileUtil::GetParentPath(path)); + // dir can be nullptr if path contains subdirectories, create those prior to creating the file. + if (dir == nullptr) { + dir = backing->CreateSubdirectory(FileUtil::GetParentPath(path)); + } auto file = dir->CreateFile(FileUtil::GetFilename(path)); if (file == nullptr) { // TODO(DarkLordZach): Find a better error code for this diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp index 4a1d1182e..4730070cb 100644 --- a/src/core/hle/service/mii/manager.cpp +++ b/src/core/hle/service/mii/manager.cpp @@ -47,66 +47,67 @@ std::array<T, DestArraySize> ResizeArray(const std::array<T, SourceArraySize>& i MiiInfo ConvertStoreDataToInfo(const MiiStoreData& data) { MiiStoreBitFields bf; std::memcpy(&bf, data.data.data.data(), sizeof(MiiStoreBitFields)); - MiiInfo info{}; - info.name = ResizeArray<char16_t, 10, 11>(data.data.name); - info.uuid = data.data.uuid; - info.font_region = static_cast<u8>(bf.font_region.Value()); - info.favorite_color = static_cast<u8>(bf.favorite_color.Value()); - info.gender = static_cast<u8>(bf.gender.Value()); - info.height = static_cast<u8>(bf.height.Value()); - info.build = static_cast<u8>(bf.build.Value()); - info.type = static_cast<u8>(bf.type.Value()); - info.region_move = static_cast<u8>(bf.region_move.Value()); - info.faceline_type = static_cast<u8>(bf.faceline_type.Value()); - info.faceline_color = static_cast<u8>(bf.faceline_color.Value()); - info.faceline_wrinkle = static_cast<u8>(bf.faceline_wrinkle.Value()); - info.faceline_make = static_cast<u8>(bf.faceline_makeup.Value()); - info.hair_type = static_cast<u8>(bf.hair_type.Value()); - info.hair_color = static_cast<u8>(bf.hair_color.Value()); - info.hair_flip = static_cast<u8>(bf.hair_flip.Value()); - info.eye_type = static_cast<u8>(bf.eye_type.Value()); - info.eye_color = static_cast<u8>(bf.eye_color.Value()); - info.eye_scale = static_cast<u8>(bf.eye_scale.Value()); - info.eye_aspect = static_cast<u8>(bf.eye_aspect.Value()); - info.eye_rotate = static_cast<u8>(bf.eye_rotate.Value()); - info.eye_x = static_cast<u8>(bf.eye_x.Value()); - info.eye_y = static_cast<u8>(bf.eye_y.Value()); - info.eyebrow_type = static_cast<u8>(bf.eyebrow_type.Value()); - info.eyebrow_color = static_cast<u8>(bf.eyebrow_color.Value()); - info.eyebrow_scale = static_cast<u8>(bf.eyebrow_scale.Value()); - info.eyebrow_aspect = static_cast<u8>(bf.eyebrow_aspect.Value()); - info.eyebrow_rotate = static_cast<u8>(bf.eyebrow_rotate.Value()); - info.eyebrow_x = static_cast<u8>(bf.eyebrow_x.Value()); - info.eyebrow_y = static_cast<u8>(bf.eyebrow_y.Value() + 3); - info.nose_type = static_cast<u8>(bf.nose_type.Value()); - info.nose_scale = static_cast<u8>(bf.nose_scale.Value()); - info.nose_y = static_cast<u8>(bf.nose_y.Value()); - info.mouth_type = static_cast<u8>(bf.mouth_type.Value()); - info.mouth_color = static_cast<u8>(bf.mouth_color.Value()); - info.mouth_scale = static_cast<u8>(bf.mouth_scale.Value()); - info.mouth_aspect = static_cast<u8>(bf.mouth_aspect.Value()); - info.mouth_y = static_cast<u8>(bf.mouth_y.Value()); - info.beard_color = static_cast<u8>(bf.beard_color.Value()); - info.beard_type = static_cast<u8>(bf.beard_type.Value()); - info.mustache_type = static_cast<u8>(bf.mustache_type.Value()); - info.mustache_scale = static_cast<u8>(bf.mustache_scale.Value()); - info.mustache_y = static_cast<u8>(bf.mustache_y.Value()); - info.glasses_type = static_cast<u8>(bf.glasses_type.Value()); - info.glasses_color = static_cast<u8>(bf.glasses_color.Value()); - info.glasses_scale = static_cast<u8>(bf.glasses_scale.Value()); - info.glasses_y = static_cast<u8>(bf.glasses_y.Value()); - info.mole_type = static_cast<u8>(bf.mole_type.Value()); - info.mole_scale = static_cast<u8>(bf.mole_scale.Value()); - info.mole_x = static_cast<u8>(bf.mole_x.Value()); - info.mole_y = static_cast<u8>(bf.mole_y.Value()); - return info; + + return { + .uuid = data.data.uuid, + .name = ResizeArray<char16_t, 10, 11>(data.data.name), + .font_region = static_cast<u8>(bf.font_region.Value()), + .favorite_color = static_cast<u8>(bf.favorite_color.Value()), + .gender = static_cast<u8>(bf.gender.Value()), + .height = static_cast<u8>(bf.height.Value()), + .build = static_cast<u8>(bf.build.Value()), + .type = static_cast<u8>(bf.type.Value()), + .region_move = static_cast<u8>(bf.region_move.Value()), + .faceline_type = static_cast<u8>(bf.faceline_type.Value()), + .faceline_color = static_cast<u8>(bf.faceline_color.Value()), + .faceline_wrinkle = static_cast<u8>(bf.faceline_wrinkle.Value()), + .faceline_make = static_cast<u8>(bf.faceline_makeup.Value()), + .hair_type = static_cast<u8>(bf.hair_type.Value()), + .hair_color = static_cast<u8>(bf.hair_color.Value()), + .hair_flip = static_cast<u8>(bf.hair_flip.Value()), + .eye_type = static_cast<u8>(bf.eye_type.Value()), + .eye_color = static_cast<u8>(bf.eye_color.Value()), + .eye_scale = static_cast<u8>(bf.eye_scale.Value()), + .eye_aspect = static_cast<u8>(bf.eye_aspect.Value()), + .eye_rotate = static_cast<u8>(bf.eye_rotate.Value()), + .eye_x = static_cast<u8>(bf.eye_x.Value()), + .eye_y = static_cast<u8>(bf.eye_y.Value()), + .eyebrow_type = static_cast<u8>(bf.eyebrow_type.Value()), + .eyebrow_color = static_cast<u8>(bf.eyebrow_color.Value()), + .eyebrow_scale = static_cast<u8>(bf.eyebrow_scale.Value()), + .eyebrow_aspect = static_cast<u8>(bf.eyebrow_aspect.Value()), + .eyebrow_rotate = static_cast<u8>(bf.eyebrow_rotate.Value()), + .eyebrow_x = static_cast<u8>(bf.eyebrow_x.Value()), + .eyebrow_y = static_cast<u8>(bf.eyebrow_y.Value() + 3), + .nose_type = static_cast<u8>(bf.nose_type.Value()), + .nose_scale = static_cast<u8>(bf.nose_scale.Value()), + .nose_y = static_cast<u8>(bf.nose_y.Value()), + .mouth_type = static_cast<u8>(bf.mouth_type.Value()), + .mouth_color = static_cast<u8>(bf.mouth_color.Value()), + .mouth_scale = static_cast<u8>(bf.mouth_scale.Value()), + .mouth_aspect = static_cast<u8>(bf.mouth_aspect.Value()), + .mouth_y = static_cast<u8>(bf.mouth_y.Value()), + .beard_color = static_cast<u8>(bf.beard_color.Value()), + .beard_type = static_cast<u8>(bf.beard_type.Value()), + .mustache_type = static_cast<u8>(bf.mustache_type.Value()), + .mustache_scale = static_cast<u8>(bf.mustache_scale.Value()), + .mustache_y = static_cast<u8>(bf.mustache_y.Value()), + .glasses_type = static_cast<u8>(bf.glasses_type.Value()), + .glasses_color = static_cast<u8>(bf.glasses_color.Value()), + .glasses_scale = static_cast<u8>(bf.glasses_scale.Value()), + .glasses_y = static_cast<u8>(bf.glasses_y.Value()), + .mole_type = static_cast<u8>(bf.mole_type.Value()), + .mole_scale = static_cast<u8>(bf.mole_scale.Value()), + .mole_x = static_cast<u8>(bf.mole_x.Value()), + .mole_y = static_cast<u8>(bf.mole_y.Value()), + }; } u16 GenerateCrc16(const void* data, std::size_t size) { s32 crc{}; - for (int i = 0; i < size; i++) { - crc ^= reinterpret_cast<const u8*>(data)[i] << 8; - for (int j = 0; j < 8; j++) { + for (std::size_t i = 0; i < size; i++) { + crc ^= static_cast<const u8*>(data)[i] << 8; + for (std::size_t j = 0; j < 8; j++) { crc <<= 1; if ((crc & 0x10000) != 0) { crc = (crc ^ 0x1021) & 0xFFFF; diff --git a/src/core/memory/dmnt_cheat_vm.cpp b/src/core/memory/dmnt_cheat_vm.cpp index fb9f36bfd..2e7da23fe 100644 --- a/src/core/memory/dmnt_cheat_vm.cpp +++ b/src/core/memory/dmnt_cheat_vm.cpp @@ -190,6 +190,15 @@ void DmntCheatVm::LogOpcode(const CheatVmOpcode& opcode) { callbacks->CommandLog( fmt::format("Act[{:02X}]: {:d}", i, save_restore_regmask->should_operate[i])); } + } else if (auto rw_static_reg = std::get_if<ReadWriteStaticRegisterOpcode>(&opcode.opcode)) { + callbacks->CommandLog("Opcode: Read/Write Static Register"); + if (rw_static_reg->static_idx < NumReadableStaticRegisters) { + callbacks->CommandLog("Op Type: ReadStaticRegister"); + } else { + callbacks->CommandLog("Op Type: WriteStaticRegister"); + } + callbacks->CommandLog(fmt::format("Reg Idx {:X}", rw_static_reg->idx)); + callbacks->CommandLog(fmt::format("Stc Idx {:X}", rw_static_reg->static_idx)); } else if (auto debug_log = std::get_if<DebugLogOpcode>(&opcode.opcode)) { callbacks->CommandLog("Opcode: Debug Log"); callbacks->CommandLog(fmt::format("Bit Width: {:X}", debug_log->bit_width)); @@ -544,6 +553,16 @@ bool DmntCheatVm::DecodeNextOpcode(CheatVmOpcode& out) { } opcode.opcode = save_restore_regmask; } break; + case CheatVmOpcodeType::ReadWriteStaticRegister: { + ReadWriteStaticRegisterOpcode rw_static_reg{}; + // C3000XXx + // C3 = opcode 0xC3. + // XX = static register index. + // x = register index. + rw_static_reg.static_idx = ((first_dword >> 4) & 0xFF); + rw_static_reg.idx = (first_dword & 0xF); + opcode.opcode = rw_static_reg; + } break; case CheatVmOpcodeType::DebugLog: { DebugLogOpcode debug_log{}; // FFFTIX## @@ -667,6 +686,7 @@ void DmntCheatVm::ResetState() { registers.fill(0); saved_values.fill(0); loop_tops.fill(0); + static_registers.fill(0); instruction_ptr = 0; condition_depth = 0; decode_success = true; @@ -1153,6 +1173,15 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) { } } } + } else if (auto rw_static_reg = + std::get_if<ReadWriteStaticRegisterOpcode>(&cur_opcode.opcode)) { + if (rw_static_reg->static_idx < NumReadableStaticRegisters) { + // Load a register with a static register. + registers[rw_static_reg->idx] = static_registers[rw_static_reg->static_idx]; + } else { + // Store a register to a static register. + static_registers[rw_static_reg->static_idx] = registers[rw_static_reg->idx]; + } } else if (auto debug_log = std::get_if<DebugLogOpcode>(&cur_opcode.opcode)) { // Read value from memory. u64 log_value = 0; diff --git a/src/core/memory/dmnt_cheat_vm.h b/src/core/memory/dmnt_cheat_vm.h index 8351fd798..21b86b72c 100644 --- a/src/core/memory/dmnt_cheat_vm.h +++ b/src/core/memory/dmnt_cheat_vm.h @@ -56,6 +56,7 @@ enum class CheatVmOpcodeType : u32 { BeginRegisterConditionalBlock = 0xC0, SaveRestoreRegister = 0xC1, SaveRestoreRegisterMask = 0xC2, + ReadWriteStaticRegister = 0xC3, // This is a meta entry, and not a real opcode. // This is to facilitate multi-nybble instruction decoding. @@ -237,6 +238,11 @@ struct SaveRestoreRegisterMaskOpcode { std::array<bool, 0x10> should_operate{}; }; +struct ReadWriteStaticRegisterOpcode { + u32 static_idx{}; + u32 idx{}; +}; + struct DebugLogOpcode { u32 bit_width{}; u32 log_id{}; @@ -259,7 +265,8 @@ struct CheatVmOpcode { PerformArithmeticStaticOpcode, BeginKeypressConditionalOpcode, PerformArithmeticRegisterOpcode, StoreRegisterToAddressOpcode, BeginRegisterConditionalOpcode, SaveRestoreRegisterOpcode, - SaveRestoreRegisterMaskOpcode, DebugLogOpcode, UnrecognizedInstruction> + SaveRestoreRegisterMaskOpcode, ReadWriteStaticRegisterOpcode, DebugLogOpcode, + UnrecognizedInstruction> opcode{}; }; @@ -281,6 +288,10 @@ public: static constexpr std::size_t MaximumProgramOpcodeCount = 0x400; static constexpr std::size_t NumRegisters = 0x10; + static constexpr std::size_t NumReadableStaticRegisters = 0x80; + static constexpr std::size_t NumWritableStaticRegisters = 0x80; + static constexpr std::size_t NumStaticRegisters = + NumReadableStaticRegisters + NumWritableStaticRegisters; explicit DmntCheatVm(std::unique_ptr<Callbacks> callbacks); ~DmntCheatVm(); @@ -302,6 +313,7 @@ private: std::array<u32, MaximumProgramOpcodeCount> program{}; std::array<u64, NumRegisters> registers{}; std::array<u64, NumRegisters> saved_values{}; + std::array<u64, NumStaticRegisters> static_registers{}; std::array<std::size_t, NumRegisters> loop_tops{}; bool DecodeNextOpcode(CheatVmOpcode& out); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index e8a6f2a6e..44252dd81 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -115,6 +115,7 @@ void LogSettings() { values.use_asynchronous_gpu_emulation.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); + log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); log_setting("Audio_OutputEngine", values.sink_id); log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); @@ -170,6 +171,7 @@ void RestoreGlobalState() { values.use_asynchronous_gpu_emulation.SetGlobal(true); values.use_vsync.SetGlobal(true); values.use_assembly_shaders.SetGlobal(true); + values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); values.force_30fps_mode.SetGlobal(true); values.bg_red.SetGlobal(true); diff --git a/src/core/settings.h b/src/core/settings.h index a64debd25..386233fdf 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -434,6 +434,7 @@ struct Values { Setting<bool> use_asynchronous_gpu_emulation; Setting<bool> use_vsync; Setting<bool> use_assembly_shaders; + Setting<bool> use_asynchronous_shaders; Setting<bool> force_30fps_mode; Setting<bool> use_fast_gpu_time; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 78915e6db..5a30c75da 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -207,6 +207,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders.GetValue()); + AddField(field_type, "Renderer_UseAsynchronousShaders", + Settings::values.use_asynchronous_shaders.GetValue()); AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 21c46a567..3cd896a0f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -98,6 +98,8 @@ add_library(video_core STATIC sampler_cache.cpp sampler_cache.h shader_cache.h + shader_notify.cpp + shader_notify.h shader/decode/arithmetic.cpp shader/decode/arithmetic_immediate.cpp shader/decode/bfe.cpp @@ -128,6 +130,8 @@ add_library(video_core STATIC shader/decode/other.cpp shader/ast.cpp shader/ast.h + shader/async_shaders.cpp + shader/async_shaders.h shader/compiler_settings.cpp shader/compiler_settings.h shader/control_flow.cpp diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 758bfe148..8e19c3373 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -20,6 +20,7 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/shader_notify.h" #include "video_core/video_core.h" namespace Tegra { @@ -36,6 +37,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); + shader_notify = std::make_unique<VideoCore::ShaderNotify>(); } GPU::~GPU() = default; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2c42483bd..8d04d9fd9 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -33,6 +33,7 @@ class System; namespace VideoCore { class RendererBase; +class ShaderNotify; } // namespace VideoCore namespace Tegra { @@ -207,6 +208,14 @@ public: return *renderer; } + VideoCore::ShaderNotify& ShaderNotify() { + return *shader_notify; + } + + const VideoCore::ShaderNotify& ShaderNotify() const { + return *shader_notify; + } + // Waits for the GPU to finish working virtual void WaitIdle() const = 0; @@ -347,6 +356,8 @@ private: std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine std::unique_ptr<Engines::KeplerMemory> kepler_memory; + /// Shader build notifier + std::unique_ptr<VideoCore::ShaderNotify> shader_notify; std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c1f20f0ab..630acb73b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -233,6 +233,8 @@ Device::Device() GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); + LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e1d811966..94d38d7d1 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -104,6 +104,10 @@ public: return use_assembly_shaders; } + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -127,6 +131,7 @@ private: bool has_fast_buffer_sub_data{}; bool has_nv_viewport_array2{}; bool use_assembly_shaders{}; + bool use_asynchronous_shaders{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e960a0ef1..c3fad563c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -149,7 +149,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, - screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { + screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, + async_shaders{emu_window} { CheckExtensions(); unified_uniform_buffer.Create(); @@ -162,6 +163,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind nullptr, 0); } } + + if (device.UseAsynchronousShaders()) { + // Max worker threads we should allow + constexpr auto MAX_THREADS = 2u; + // Amount of threads we should reserve for other parts of yuzu + constexpr auto RESERVED_THREADS = 6u; + // Get the amount of threads we can use(this can return zero) + const auto cpu_thread_count = + std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); + // Deduce how many "extra" threads we have to use. + const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1u, max_threads_unused); + // Don't use more than MAX_THREADS + const auto worker_count = std::min(max_worker_count, MAX_THREADS); + async_shaders.AllocateWorkers(worker_count); + } } RasterizerOpenGL::~RasterizerOpenGL() { @@ -336,7 +354,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - Shader* const shader = shader_cache.GetStageProgram(program); + Shader* shader = shader_cache.GetStageProgram(program, async_shaders); if (device.UseAssemblyShaders()) { // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this @@ -353,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupDrawTextures(stage, shader); SetupDrawImages(stage, shader); - const GLuint program_handle = shader->GetHandle(); + const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f082592f..a95646936 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -33,6 +33,7 @@ #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" namespace Core { @@ -91,6 +92,14 @@ public: return num_queued_commands > 0; } + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); @@ -242,6 +251,7 @@ private: ScreenInfo& screen_info; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCommon::Shader::AsyncShaders async_shaders; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f8b322227..b05cb641c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -177,6 +177,12 @@ public: Release(); } + OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c6a3bf3a1..f469ed656 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -31,6 +31,7 @@ #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace OpenGL { @@ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { return registry; } +std::unordered_set<GLenum> GetSupportedFormats() { + GLint num_formats; + glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); + + std::vector<GLint> formats(num_formats); + glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); + + std::unordered_set<GLenum> supported_formats; + for (const GLint format : formats) { + supported_formats.insert(static_cast<GLenum>(format)); + } + return supported_formats; +} + +} // Anonymous namespace + ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, - const ShaderIR& ir, const Registry& registry, - bool hint_retrievable = false) { + const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { const std::string shader_id = MakeShaderID(unique_identifier, shader_type); LOG_INFO(Render_OpenGL, "{}", shader_id); @@ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u return program; } -std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats; - glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); - - std::vector<GLint> formats(num_formats); - glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); - - std::unordered_set<GLenum> supported_formats; - for (const GLint format : formats) { - supported_formats.insert(static_cast<GLenum>(format)); - } - return supported_formats; -} - -} // Anonymous namespace - Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, - ProgramSharedPtr program_) - : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { + ProgramSharedPtr program_, bool is_built) + : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, + is_built(is_built) { handle = program->assembly_program.handle; if (handle == 0) { handle = program->source_program.handle; } - ASSERT(handle != 0); + if (is_built) { + ASSERT(handle != 0); + } } Shader::~Shader() = default; @@ -214,42 +217,82 @@ GLuint Shader::GetHandle() const { return handle; } -std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode code, ProgramCode code_b) { +bool Shader::IsBuilt() const { + return is_built; +} + +void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { + program->source_program = std::move(new_program); + handle = program->source_program.handle; + is_built = true; +} + +void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { + program->assembly_program = std::move(new_program); + handle = program->assembly_program.handle; + is_built = true; +} + +std::unique_ptr<Shader> Shader::CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, + ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { const auto shader_type = GetShaderType(program_type); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (!code_b.empty()) { - // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); - // } - auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); + if (!async_shaders.IsShaderAsync(params.system.GPU()) || + !params.device.UseAsynchronousShaders()) { + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + // TODO(Rodrigo): Handle VertexA shaders + // std::optional<ShaderIR> ir_b; + // if (!code_b.empty()) { + // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); + // } + auto program = + BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + ShaderDiskCacheEntry entry; + entry.type = shader_type; + entry.code = std::move(code); + entry.code_b = std::move(code_b); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + gpu.ShaderNotify().MarkShaderComplete(); + + return std::unique_ptr<Shader>(new Shader(std::move(registry), + MakeEntries(params.device, ir, shader_type), + std::move(program), true)); + } else { + // Required for entries + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + auto entries = MakeEntries(params.device, ir, shader_type); - ShaderDiskCacheEntry entry; - entry.type = shader_type; - entry.code = std::move(code); - entry.code_b = std::move(code_b); - entry.unique_identifier = params.unique_identifier; - entry.bound_buffer = registry->GetBoundBuffer(); - entry.graphics_info = registry->GetGraphicsInfo(); - entry.keys = registry->GetKeys(); - entry.bound_samplers = registry->GetBoundSamplers(); - entry.bindless_samplers = registry->GetBindlessSamplers(); - params.disk_cache.SaveEntry(std::move(entry)); + async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, + std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, + COMPILER_SETTINGS, *registry, cpu_addr); - return std::unique_ptr<Shader>(new Shader( - std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); + auto program = std::make_shared<ProgramHandle>(); + return std::unique_ptr<Shader>( + new Shader(std::move(registry), std::move(entries), std::move(program), false)); + } } std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto& engine = params.system.GPU().KeplerCompute(); + auto& gpu = params.system.GPU(); + gpu.ShaderNotify().MarkSharderBuilding(); + + auto& engine = gpu.KeplerCompute(); auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const u64 uid = params.unique_identifier; @@ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); + gpu.ShaderNotify().MarkShaderComplete(); + return std::unique_ptr<Shader>(new Shader(std::move(registry), MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); @@ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( return program; } -Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { +Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders) { if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { - return last_shaders[static_cast<std::size_t>(program)]; + auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; + if (last_shader->IsBuilt()) { + return last_shader; + } } auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr address{GetShaderAddress(system, program)}; + if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { + auto completed_work = async_shaders.GetCompletedWork(); + for (auto& work : completed_work) { + Shader* shader = TryGet(work.cpu_address); + auto& gpu = system.GPU(); + gpu.ShaderNotify().MarkShaderComplete(); + if (shader == nullptr) { + continue; + } + using namespace VideoCommon::Shader; + if (work.backend == AsyncShaders::Backend::OpenGL) { + shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + shader->AsyncGLASMBuilt(std::move(work.program.glasm)); + } + + ShaderDiskCacheEntry entry; + entry.type = work.shader_type; + entry.code = std::move(work.code); + entry.code_b = std::move(work.code_b); + entry.unique_identifier = work.uid; + + auto& registry = shader->GetRegistry(); + + entry.bound_buffer = registry.GetBoundBuffer(); + entry.graphics_info = registry.GetGraphicsInfo(); + entry.keys = registry.GetKeys(); + entry.bound_samplers = registry.GetBoundSamplers(); + entry.bindless_samplers = registry.GetBindlessSamplers(); + disk_cache.SaveEntry(std::move(entry)); + } + } + // Look up shader in the cache based on address const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { @@ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { std::unique_ptr<Shader> shader; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { - shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); + shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), + async_shaders, cpu_addr.value_or(0)); } else { shader = Shader::CreateFromCache(params, found->second); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 994aaeaf2..7528ac686 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -33,6 +33,10 @@ namespace Core::Frontend { class EmuWindow; } +namespace VideoCommon::Shader { +class AsyncShaders; +} + namespace OpenGL { class Device; @@ -61,6 +65,11 @@ struct ShaderParameters { u64 unique_identifier; }; +ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, + u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + bool hint_retrievable = false); + class Shader final { public: ~Shader(); @@ -68,15 +77,28 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; + bool IsBuilt() const; + /// Gets the shader entries for the shader const ShaderEntries& GetEntries() const { return entries; } - static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, - ProgramCode program_code_b); + const VideoCommon::Shader::Registry& GetRegistry() const { + return *registry; + } + + /// Mark a OpenGL shader as built + void AsyncOpenGLBuilt(OGLProgram new_program); + + /// Mark a GLASM shader as built + void AsyncGLASMBuilt(OGLAssemblyProgram new_program); + + static std::unique_ptr<Shader> CreateStageFromMemory( + const ShaderParameters& params, Maxwell::ShaderProgram program_type, + ProgramCode program_code, ProgramCode program_code_b, + VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); + static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); @@ -85,12 +107,13 @@ public: private: explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, - ProgramSharedPtr program); + ProgramSharedPtr program, bool is_built = true); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; ProgramSharedPtr program; GLuint handle = 0; + bool is_built{}; }; class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { @@ -104,7 +127,8 @@ public: const VideoCore::DiskResourceLoadCallback& callback); /// Gets the current specified shader stage program - Shader* GetStageProgram(Maxwell::ShaderProgram program); + Shader* GetStageProgram(Maxwell::ShaderProgram program, + VideoCommon::Shader::AsyncShaders& async_shaders); /// Gets a compute kernel in the passed address Shader* GetComputeKernel(GPUVAddr code_addr); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2be38d419..1d2f8b557 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -39,16 +39,17 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) - : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(size); - ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; + : BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(size), + .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; buffer.handle = device.GetLogical().CreateBuffer(ci); buffer.commit = memory_manager.Commit(buffer.handle, false); @@ -66,16 +67,17 @@ void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = UPLOAD_ACCESS_BARRIERS, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, barrier, {}); }); @@ -87,16 +89,17 @@ void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { const VkBuffer handle = Handle(); scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { - VkBufferMemoryBarrier barrier; - barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; - barrier.pNext = nullptr; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.buffer = handle; - barrier.offset = offset; - barrier.size = size; + const VkBufferMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = handle, + .offset = offset, + .size = size, + }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index da71e710c..182461ed9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -115,32 +115,32 @@ constexpr u8 quad_array[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { - VkDescriptorSetLayoutBinding binding; - binding.binding = 0; - binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = nullptr; - return binding; + return { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }; } VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { - VkPushConstantRange range; - range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - range.offset = 0; - range.size = static_cast<u32>(size); - return range; + return { + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .offset = 0, + .size = static_cast<u32>(size), + }; } // Uint8 SPIR-V module. Generated from the "shaders/" directory. @@ -344,29 +344,33 @@ constexpr u8 QUAD_INDEXED_SPV[] = { 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { - std::array<VkDescriptorSetLayoutBinding, 2> bindings; - bindings[0].binding = 0; - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[0].pImmutableSamplers = nullptr; - bindings[1].binding = 1; - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[1].pImmutableSamplers = nullptr; - return bindings; + return {{ + { + .binding = 0, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + { + .binding = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }, + }}; } VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { - VkDescriptorUpdateTemplateEntryKHR entry; - entry.dstBinding = 0; - entry.dstArrayElement = 0; - entry.descriptorCount = 2; - entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - entry.offset = 0; - entry.stride = sizeof(DescriptorUpdateEntry); - return entry; + return { + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 2, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .offset = 0, + .stride = sizeof(DescriptorUpdateEntry), + }; } } // Anonymous namespace @@ -376,37 +380,37 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, const u8* code) { - VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; - descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_layout_ci.pNext = nullptr; - descriptor_layout_ci.flags = 0; - descriptor_layout_ci.bindingCount = bindings.size(); - descriptor_layout_ci.pBindings = bindings.data(); - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); - - VkPipelineLayoutCreateInfo pipeline_layout_ci; - pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_ci.pNext = nullptr; - pipeline_layout_ci.flags = 0; - pipeline_layout_ci.setLayoutCount = 1; - pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); - pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); - pipeline_layout_ci.pPushConstantRanges = push_constants.data(); - layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); + descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }); + + layout = device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data(), + }); if (!templates.empty()) { - VkDescriptorUpdateTemplateCreateInfoKHR template_ci; - template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - template_ci.pNext = nullptr; - template_ci.flags = 0; - template_ci.descriptorUpdateEntryCount = templates.size(); - template_ci.pDescriptorUpdateEntries = templates.data(); - template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - template_ci.descriptorSetLayout = *descriptor_set_layout; - template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - template_ci.pipelineLayout = *layout; - template_ci.set = 0; - descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); + descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = templates.size(), + .pDescriptorUpdateEntries = templates.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = 0, + }); descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); } @@ -414,32 +418,32 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); std::memcpy(code_copy.get(), code, code_size); - VkShaderModuleCreateInfo module_ci; - module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - module_ci.pNext = nullptr; - module_ci.flags = 0; - module_ci.codeSize = code_size; - module_ci.pCode = code_copy.get(); - module = device.GetLogical().CreateShaderModule(module_ci); - - VkComputePipelineCreateInfo pipeline_ci; - pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline_ci.pNext = nullptr; - pipeline_ci.flags = 0; - pipeline_ci.layout = *layout; - pipeline_ci.basePipelineHandle = nullptr; - pipeline_ci.basePipelineIndex = 0; - - VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); + module = device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = code_copy.get(), + }); + + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }); } VKComputePass::~VKComputePass() = default; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 281bf9ac3..ed9d2991c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -43,12 +43,13 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { // TODO(Rodrigo): Maybe make individual bindings here? for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = 1; - entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = 1, + .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, + .pImmutableSamplers = nullptr, + }); } }; add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); @@ -58,25 +59,25 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); - VkDescriptorSetLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.bindingCount = static_cast<u32>(bindings.size()); - ci.pBindings = bindings.data(); - return device.GetLogical().CreateDescriptorSetLayout(ci); + return device.GetLogical().CreateDescriptorSetLayout({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .bindingCount = static_cast<u32>(bindings.size()), + .pBindings = bindings.data(), + }); } vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { - VkPipelineLayoutCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.setLayoutCount = 1; - ci.pSetLayouts = descriptor_set_layout.address(); - ci.pushConstantRangeCount = 0; - ci.pPushConstantRanges = nullptr; - return device.GetLogical().CreatePipelineLayout(ci); + return device.GetLogical().CreatePipelineLayout({ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 0, + .pPushConstantRanges = nullptr, + }); } vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { @@ -89,59 +90,63 @@ vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplat return {}; } - VkDescriptorUpdateTemplateCreateInfoKHR ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; - ci.pNext = nullptr; - ci.flags = 0; - ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); - ci.pDescriptorUpdateEntries = template_entries.data(); - ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; - ci.descriptorSetLayout = *descriptor_set_layout; - ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - ci.pipelineLayout = *layout; - ci.set = DESCRIPTOR_SET; - return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); + return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), + .pDescriptorUpdateEntries = template_entries.data(), + .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + .descriptorSetLayout = *descriptor_set_layout, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .pipelineLayout = *layout, + .set = DESCRIPTOR_SET, + }); } vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { device.SaveShader(code); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code.size() * sizeof(u32); - ci.pCode = code.data(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code.size() * sizeof(u32), + .pCode = code.data(), + }); } vk::Pipeline VKComputePipeline::CreatePipeline() const { - VkComputePipelineCreateInfo ci; - VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; - stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stage_ci.pNext = nullptr; - stage_ci.flags = 0; - stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stage_ci.module = *shader_module; - stage_ci.pName = "main"; - stage_ci.pSpecializationInfo = nullptr; - - VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; - subgroup_size_ci.sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; - subgroup_size_ci.pNext = nullptr; - subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; + + VkComputePipelineCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *shader_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *layout, + .basePipelineHandle = nullptr, + .basePipelineIndex = 0, + }; + + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { - stage_ci.pNext = &subgroup_size_ci; + ci.stage.pNext = &subgroup_size_ci; } - ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.layout = *layout; - ci.basePipelineHandle = nullptr; - ci.basePipelineIndex = 0; return device.GetLogical().CreateComputePipeline(ci); } diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 9259b618d..ac4a0884e 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -43,27 +43,30 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, - {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; - - VkDescriptorPoolCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - ci.maxSets = num_sets; - ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); - ci.pPoolSizes = std::data(pool_sizes); + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, + }; + + const VkDescriptorPoolCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, + .maxSets = num_sets, + .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), + .pPoolSizes = std::data(pool_sizes), + }; return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); } vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count) { const std::vector layout_copies(count, layout); - VkDescriptorSetAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.descriptorPool = **active_pool; - ai.descriptorSetCount = static_cast<u32>(count); - ai.pSetLayouts = layout_copies.data(); + VkDescriptorSetAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .pNext = nullptr, + .descriptorPool = **active_pool, + .descriptorSetCount = static_cast<u32>(count), + .pSetLayouts = layout_copies.data(), + }; vk::DescriptorSets sets = active_pool->Allocate(ai); if (!sets.IsOutOfPoolMemory()) { diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 9226e591c..26379ee01 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -757,14 +757,14 @@ std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const queue_cis.reserve(unique_queue_families.size()); for (const u32 queue_family : unique_queue_families) { - queue_cis.push_back({ + auto& ci = queue_cis.emplace_back(VkDeviceQueueCreateInfo{ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .pNext = nullptr, .flags = 0, .queueFamilyIndex = queue_family, - .queueCount = 1, - .pQueuePriorities = &QUEUE_PRIORITY, }); + ci.queueCount = 1; + ci.pQueuePriorities = &QUEUE_PRIORITY; } return queue_cis; diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 9bceb3861..1c418ea17 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -102,21 +102,29 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num void VKImage::CreatePresentView() { // Image type has to be 2D to be presented. - VkImageViewCreateInfo image_view_ci; - image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - image_view_ci.pNext = nullptr; - image_view_ci.flags = 0; - image_view_ci.image = *image; - image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - image_view_ci.format = format; - image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - image_view_ci.subresourceRange.aspectMask = aspect_mask; - image_view_ci.subresourceRange.baseMipLevel = 0; - image_view_ci.subresourceRange.levelCount = 1; - image_view_ci.subresourceRange.baseArrayLayer = 0; - image_view_ci.subresourceRange.layerCount = 1; - present_view = device.GetLogical().CreateImageView(image_view_ci); + present_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }); } VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index b4c650a63..24c8960ac 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -178,13 +178,12 @@ bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 t }(); // Try to allocate found type. - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = size; - memory_ai.memoryTypeIndex = type; - - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = size, + .memoryTypeIndex = type, + }); if (!memory) { LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); return false; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3da835324..42b3a744c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -88,12 +88,13 @@ void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& bindi // Combined image samplers can be arrayed. count = container[i].size; } - VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); - entry.binding = binding++; - entry.descriptorType = descriptor_type; - entry.descriptorCount = count; - entry.stageFlags = stage_flags; - entry.pImmutableSamplers = nullptr; + bindings.push_back({ + .binding = binding++, + .descriptorType = descriptor_type, + .descriptorCount = count, + .stageFlags = stage_flags, + .pImmutableSamplers = nullptr, + }); } } @@ -259,10 +260,10 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } } - Specialization specialization; - specialization.workgroup_size = key.workgroup_size; - specialization.shared_memory_size = key.shared_memory_size; - + const Specialization specialization{ + .workgroup_size = key.workgroup_size, + .shared_memory_size = key.shared_memory_size, + }; const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, shader->GetRegistry(), specialization), shader->GetEntries()}; @@ -370,13 +371,14 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { for (u32 i = 0; i < count; ++i) { const u32 num_samplers = container[i].size; - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = num_samplers; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = num_samplers, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); ++binding; offset += num_samplers * entry_size; @@ -389,22 +391,24 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. // Note: Fixed in driver Windows 443.24, Linux 440.66.15 for (u32 i = 0; i < count; ++i) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding + i; - entry.dstArrayElement = 0; - entry.descriptorCount = 1; - entry.descriptorType = descriptor_type; - entry.offset = static_cast<std::size_t>(offset + i * entry_size); - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding + i, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = descriptor_type, + .offset = static_cast<std::size_t>(offset + i * entry_size), + .stride = entry_size, + }); } } else if (count > 0) { - VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); - entry.dstBinding = binding; - entry.dstArrayElement = 0; - entry.descriptorCount = count; - entry.descriptorType = descriptor_type; - entry.offset = offset; - entry.stride = entry_size; + template_entries.push_back({ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = count, + .descriptorType = descriptor_type, + .offset = offset, + .stride = entry_size, + }); } offset += count * entry_size; binding += count; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index bc91c48cc..6cd63d090 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -47,14 +47,14 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - VkQueryPoolCreateInfo query_pool_ci; - query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - query_pool_ci.pNext = nullptr; - query_pool_ci.flags = 0; - query_pool_ci.queryType = GetTarget(type); - query_pool_ci.queryCount = static_cast<u32>(end - begin); - query_pool_ci.pipelineStatistics = 0; - pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); + pools.push_back(device->GetLogical().CreateQueryPool({ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .queryType = GetTarget(type), + .queryCount = static_cast<u32>(end - begin), + .pipelineStatistics = 0, + })); } void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7625871c2..31e44aa2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -64,20 +64,22 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si const auto& src = regs.viewport_transform[index]; const float width = src.scale_x * 2.0f; const float height = src.scale_y * 2.0f; + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - VkViewport viewport; - viewport.x = src.translate_x - src.scale_x; - viewport.y = src.translate_y - src.scale_y; - viewport.width = width != 0.0f ? width : 1.0f; - viewport.height = height != 0.0f ? height : 1.0f; + VkViewport viewport{ + .x = src.translate_x - src.scale_x, + .y = src.translate_y - src.scale_y, + .width = width != 0.0f ? width : 1.0f, + .height = height != 0.0f ? height : 1.0f, + .minDepth = src.translate_z - src.scale_z * reduce_z, + .maxDepth = src.translate_z + src.scale_z, + }; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; - viewport.minDepth = src.translate_z - src.scale_z * reduce_z; - viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } + return viewport; } @@ -508,10 +510,11 @@ void RasterizerVulkan::Clear() { const u32 color_attachment = regs.clear_buffers.RT; scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { - VkClearAttachment attachment; - attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - attachment.colorAttachment = color_attachment; - attachment.clearValue = clear_value; + const VkClearAttachment attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = color_attachment, + .clearValue = clear_value, + }; cmdbuf.ClearAttachments(attachment, clear_rect); }); } @@ -551,13 +554,16 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { query_cache.UpdateCounters(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - ComputePipelineCacheKey key; - key.shader = code_addr; - key.shared_memory_size = launch_desc.shared_alloc; - key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z}; - - auto& pipeline = pipeline_cache.GetComputePipeline(key); + auto& pipeline = pipeline_cache.GetComputePipeline({ + .shader = code_addr, + .shared_memory_size = launch_desc.shared_alloc, + .workgroup_size = + { + launch_desc.block_dim_x, + launch_desc.block_dim_y, + launch_desc.block_dim_z, + }, + }); // Compute dispatches can't be executed inside a renderpass scheduler.RequestOutsideRenderPassOperationContext(); @@ -841,17 +847,17 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - VkFramebufferCreateInfo framebuffer_ci; - framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - framebuffer_ci.pNext = nullptr; - framebuffer_ci.flags = 0; - framebuffer_ci.renderPass = key.renderpass; - framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); - framebuffer_ci.pAttachments = key.views.data(); - framebuffer_ci.width = key.width; - framebuffer_ci.height = key.height; - framebuffer_ci.layers = key.layers; - framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); + framebuffer = device.GetLogical().CreateFramebuffer({ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = key.renderpass, + .attachmentCount = static_cast<u32>(key.views.size()), + .pAttachments = key.views.data(), + .width = key.width, + .height = key.height, + .layers = key.layers, + }); } return {*framebuffer, VkExtent2D{key.width, key.height}}; @@ -1553,17 +1559,17 @@ VkBuffer RasterizerVulkan::DefaultBuffer() { return *default_buffer; } - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = DEFAULT_BUFFER_SIZE; - ci.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - default_buffer = device.GetLogical().CreateBuffer(ci); + default_buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = DEFAULT_BUFFER_SIZE, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); default_buffer_commit = memory_manager.Commit(default_buffer, false); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 3f71d005e..80284cf92 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -39,10 +39,14 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { using namespace VideoCore::Surface; + const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); + std::vector<VkAttachmentDescription> descriptors; + descriptors.reserve(num_attachments); + std::vector<VkAttachmentReference> color_references; + color_references.reserve(num_attachments); - const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); for (std::size_t rt = 0; rt < num_attachments; ++rt) { const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); @@ -54,20 +58,22 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - descriptor.initialLayout = color_layout; - descriptor.finalLayout = color_layout; - - VkAttachmentReference& reference = color_references.emplace_back(); - reference.attachment = static_cast<u32>(rt); - reference.layout = color_layout; + descriptors.push_back({ + .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, + .initialLayout = color_layout, + .finalLayout = color_layout, + }); + + color_references.push_back({ + .attachment = static_cast<u32>(rt), + .layout = color_layout, + }); } VkAttachmentReference zeta_attachment_ref; @@ -82,32 +88,36 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param const VkImageLayout zeta_layout = params.zeta_texception != 0 ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; - VkAttachmentDescription& descriptor = descriptors.emplace_back(); - descriptor.flags = 0; - descriptor.format = format.format; - descriptor.samples = VK_SAMPLE_COUNT_1_BIT; - descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; - descriptor.initialLayout = zeta_layout; - descriptor.finalLayout = zeta_layout; - - zeta_attachment_ref.attachment = static_cast<u32>(num_attachments); - zeta_attachment_ref.layout = zeta_layout; + descriptors.push_back({ + .flags = 0, + .format = format.format, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = zeta_layout, + .finalLayout = zeta_layout, + }); + + zeta_attachment_ref = { + .attachment = static_cast<u32>(num_attachments), + .layout = zeta_layout, + }; } - VkSubpassDescription subpass_description; - subpass_description.flags = 0; - subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - subpass_description.inputAttachmentCount = 0; - subpass_description.pInputAttachments = nullptr; - subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); - subpass_description.pColorAttachments = color_references.data(); - subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr; - subpass_description.preserveAttachmentCount = 0; - subpass_description.pPreserveAttachments = nullptr; + const VkSubpassDescription subpass_description{ + .flags = 0, + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .inputAttachmentCount = 0, + .pInputAttachments = nullptr, + .colorAttachmentCount = static_cast<u32>(color_references.size()), + .pColorAttachments = color_references.data(), + .pResolveAttachments = nullptr, + .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr, + .preserveAttachmentCount = 0, + .pPreserveAttachments = nullptr, + }; VkAccessFlags access = 0; VkPipelineStageFlags stage = 0; @@ -122,26 +132,27 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; } - VkSubpassDependency subpass_dependency; - subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; - subpass_dependency.dstSubpass = 0; - subpass_dependency.srcStageMask = stage; - subpass_dependency.dstStageMask = stage; - subpass_dependency.srcAccessMask = 0; - subpass_dependency.dstAccessMask = access; - subpass_dependency.dependencyFlags = 0; - - VkRenderPassCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.attachmentCount = static_cast<u32>(descriptors.size()); - ci.pAttachments = descriptors.data(); - ci.subpassCount = 1; - ci.pSubpasses = &subpass_description; - ci.dependencyCount = 1; - ci.pDependencies = &subpass_dependency; - return device.GetLogical().CreateRenderPass(ci); + const VkSubpassDependency subpass_dependency{ + .srcSubpass = VK_SUBPASS_EXTERNAL, + .dstSubpass = 0, + .srcStageMask = stage, + .dstStageMask = stage, + .srcAccessMask = 0, + .dstAccessMask = access, + .dependencyFlags = 0, + }; + + return device.GetLogical().CreateRenderPass({ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .attachmentCount = static_cast<u32>(descriptors.size()), + .pAttachments = descriptors.data(), + .subpassCount = 1, + .pSubpasses = &subpass_description, + .dependencyCount = 1, + .pDependencies = &subpass_dependency, + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index dc06f545a..f19330a36 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp @@ -18,33 +18,32 @@ namespace { constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; constexpr std::size_t FENCES_GROW_STEP = 0x40; -VkFenceCreateInfo BuildFenceCreateInfo() { - VkFenceCreateInfo fence_ci; - fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fence_ci.pNext = nullptr; - fence_ci.flags = 0; - return fence_ci; +constexpr VkFenceCreateInfo BuildFenceCreateInfo() { + return { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; } } // Anonymous namespace class CommandBufferPool final : public VKFencedPool { public: - CommandBufferPool(const VKDevice& device) + explicit CommandBufferPool(const VKDevice& device) : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} void Allocate(std::size_t begin, std::size_t end) override { // Command buffers are going to be commited, recorded, executed every single usage cycle. // They are also going to be reseted when commited. - VkCommandPoolCreateInfo command_pool_ci; - command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - command_pool_ci.pNext = nullptr; - command_pool_ci.flags = - VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); - Pool& pool = pools.emplace_back(); - pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); + pool.handle = device.GetLogical().CreateCommandPool({ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = device.GetGraphicsFamily(), + }); pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); } diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 616eacc36..2d5460776 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -44,32 +44,35 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c const bool arbitrary_borders = device.IsExtCustomBorderColorSupported(); const std::array color = tsc.GetBorderColor(); - VkSamplerCustomBorderColorCreateInfoEXT border; - border.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT; - border.pNext = nullptr; - border.format = VK_FORMAT_UNDEFINED; + VkSamplerCustomBorderColorCreateInfoEXT border{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, + .pNext = nullptr, + .format = VK_FORMAT_UNDEFINED, + }; std::memcpy(&border.customBorderColor, color.data(), sizeof(color)); - VkSamplerCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - ci.pNext = arbitrary_borders ? &border : nullptr; - ci.flags = 0; - ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); - ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); - ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); - ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); - ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); - ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); - ci.mipLodBias = tsc.GetLodBias(); - ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; - ci.maxAnisotropy = tsc.GetMaxAnisotropy(); - ci.compareEnable = tsc.depth_compare_enabled; - ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); - ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(); - ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(); - ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); - ci.unnormalizedCoordinates = VK_FALSE; - return device.GetLogical().CreateSampler(ci); + return device.GetLogical().CreateSampler({ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = arbitrary_borders ? &border : nullptr, + .flags = 0, + .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter), + .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter), + .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), + .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), + .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), + .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), + .mipLodBias = tsc.GetLodBias(), + .anisotropyEnable = + static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE), + .maxAnisotropy = tsc.GetMaxAnisotropy(), + .compareEnable = tsc.depth_compare_enabled, + .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), + .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(), + .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(), + .borderColor = + arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color), + .unnormalizedCoordinates = VK_FALSE, + }); } VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 56524e6f3..dbbd0961a 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -100,16 +100,19 @@ void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer frame state.framebuffer = framebuffer; state.render_area = render_area; - VkRenderPassBeginInfo renderpass_bi; - renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderpass_bi.pNext = nullptr; - renderpass_bi.renderPass = renderpass; - renderpass_bi.framebuffer = framebuffer; - renderpass_bi.renderArea.offset.x = 0; - renderpass_bi.renderArea.offset.y = 0; - renderpass_bi.renderArea.extent = render_area; - renderpass_bi.clearValueCount = 0; - renderpass_bi.pClearValues = nullptr; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = renderpass, + .framebuffer = framebuffer, + .renderArea = + { + .offset = {.x = 0, .y = 0}, + .extent = render_area, + }, + .clearValueCount = 0, + .pClearValues = nullptr, + }; Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { if (end_renderpass) { @@ -157,16 +160,17 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { current_cmdbuf.End(); - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = nullptr; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = nullptr; - submit_info.pWaitDstStageMask = nullptr; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = current_cmdbuf.address(); - submit_info.signalSemaphoreCount = semaphore ? 1 : 0; - submit_info.pSignalSemaphores = &semaphore; + const VkSubmitInfo submit_info{ + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .pWaitDstStageMask = nullptr, + .commandBufferCount = 1, + .pCommandBuffers = current_cmdbuf.address(), + .signalSemaphoreCount = semaphore ? 1U : 0U, + .pSignalSemaphores = &semaphore, + }; switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { case VK_SUCCESS: break; @@ -181,19 +185,18 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { void VKScheduler::AllocateNewContext() { ++ticks; - VkCommandBufferBeginInfo cmdbuf_bi; - cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmdbuf_bi.pNext = nullptr; - cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmdbuf_bi.pInheritanceInfo = nullptr; - std::unique_lock lock{mutex}; current_fence = next_fence; next_fence = &resource_manager.CommitFence(); current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), device.GetDispatchLoader()); - current_cmdbuf.Begin(cmdbuf_bi); + current_cmdbuf.Begin({ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .pNext = nullptr, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + .pInheritanceInfo = nullptr, + }); // Enable counters once again. These are disabled when a command buffer is finished. if (query_cache) { diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 112df9c71..c1a218d76 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -19,13 +19,13 @@ vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, cons const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); std::memcpy(data.get(), code_data, code_size); - VkShaderModuleCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.codeSize = code_size; - ci.pCode = data.get(); - return device.GetLogical().CreateShaderModule(ci); + return device.GetLogical().CreateShaderModule({ + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .codeSize = code_size, + .pCode = data.get(), + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 45c180221..5eca0ab91 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -71,20 +71,19 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { const u32 log2 = Common::Log2Ceil64(size); - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = 1ULL << log2; - ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | - VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - auto buffer = std::make_unique<VKBuffer>(); - buffer->handle = device.GetLogical().CreateBuffer(ci); + buffer->handle = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = 1ULL << log2, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); buffer->commit = memory_manager.Commit(buffer->handle, host_visible); auto& entries = GetCache(host_visible)[log2].entries; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index e5a583dd5..9151d9fb1 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -158,6 +158,7 @@ void StateTracker::Initialize() { SetupDirtyFrontFace(tables); SetupDirtyPrimitiveTopology(tables); SetupDirtyStencilOp(tables); + SetupDirtyStencilTestEnable(tables); } void StateTracker::InvalidateCommandBufferState() { diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 2d28a6c47..a5526a3f5 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -122,30 +122,27 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; - - VkBufferCreateInfo buffer_ci; - buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - buffer_ci.pNext = nullptr; - buffer_ci.flags = 0; - buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); - buffer_ci.usage = usage; - buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - buffer_ci.queueFamilyIndexCount = 0; - buffer_ci.pQueueFamilyIndices = nullptr; - - buffer = device.GetLogical().CreateBuffer(buffer_ci); + buffer = device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); const u32 required_flags = requirements.memoryTypeBits; stream_buffer_size = static_cast<u64>(requirements.size); - VkMemoryAllocateInfo memory_ai; - memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - memory_ai.pNext = nullptr; - memory_ai.allocationSize = requirements.size; - memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); - - memory = device.GetLogical().AllocateMemory(memory_ai); + memory = device.GetLogical().AllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = requirements.size, + .memoryTypeIndex = GetMemoryType(memory_properties, required_flags), + }); buffer.BindMemory(*memory, 0); } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index bffd8f32a..c25e312b6 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -95,15 +95,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { const auto present_queue{device.GetPresentQueue()}; bool recreated = false; - VkPresentInfoKHR present_info; - present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.pNext = nullptr; - present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; - present_info.pWaitSemaphores = semaphores.data(); - present_info.swapchainCount = 1; - present_info.pSwapchains = swapchain.address(); - present_info.pImageIndices = &image_index; - present_info.pResults = nullptr; + const VkPresentInfoKHR present_info{ + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreCount = render_semaphore ? 2U : 1U, + .pWaitSemaphores = semaphores.data(), + .swapchainCount = 1, + .pSwapchains = swapchain.address(), + .pImageIndices = &image_index, + .pResults = nullptr, + }; switch (const VkResult result = present_queue.Present(present_info)) { case VK_SUCCESS: @@ -147,24 +148,25 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, requested_image_count = capabilities.maxImageCount; } - VkSwapchainCreateInfoKHR swapchain_ci; - swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; - swapchain_ci.pNext = nullptr; - swapchain_ci.flags = 0; - swapchain_ci.surface = surface; - swapchain_ci.minImageCount = requested_image_count; - swapchain_ci.imageFormat = surface_format.format; - swapchain_ci.imageColorSpace = surface_format.colorSpace; - swapchain_ci.imageArrayLayers = 1; - swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; - swapchain_ci.queueFamilyIndexCount = 0; - swapchain_ci.pQueueFamilyIndices = nullptr; - swapchain_ci.preTransform = capabilities.currentTransform; - swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; - swapchain_ci.presentMode = present_mode; - swapchain_ci.clipped = VK_FALSE; - swapchain_ci.oldSwapchain = nullptr; + VkSwapchainCreateInfoKHR swapchain_ci{ + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .surface = surface, + .minImageCount = requested_image_count, + .imageFormat = surface_format.format, + .imageColorSpace = surface_format.colorSpace, + .imageArrayLayers = 1, + .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .preTransform = capabilities.currentTransform, + .compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + .presentMode = present_mode, + .clipped = VK_FALSE, + .oldSwapchain = nullptr, + }; const u32 graphics_family{device.GetGraphicsFamily()}; const u32 present_family{device.GetPresentFamily()}; @@ -173,8 +175,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); swapchain_ci.pQueueFamilyIndices = queue_indices.data(); - } else { - swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; } // Request the size again to reduce the possibility of a TOCTOU race condition. @@ -200,20 +200,28 @@ void VKSwapchain::CreateSemaphores() { } void VKSwapchain::CreateImageViews() { - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - // ci.image - ci.viewType = VK_IMAGE_VIEW_TYPE_2D; - ci.format = image_format; - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - ci.subresourceRange.baseMipLevel = 0; - ci.subresourceRange.levelCount = 1; - ci.subresourceRange.baseArrayLayer = 0; - ci.subresourceRange.layerCount = 1; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = image_format, + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; image_views.resize(image_count); for (std::size_t i = 0; i < image_count; i++) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index bd93dcf20..9bc18c21a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -95,17 +95,18 @@ VkImageViewType GetImageViewType(SurfaceTarget target) { vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - VkBufferCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.size = static_cast<VkDeviceSize>(host_memory_size); - ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - return device.GetLogical().CreateBuffer(ci); + return device.GetLogical().CreateBuffer({ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = static_cast<VkDeviceSize>(host_memory_size), + .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }); } VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, @@ -113,15 +114,16 @@ VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, std::size_t host_memory_size) { ASSERT(params.IsBuffer()); - VkBufferViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.buffer = buffer; - ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - ci.offset = 0; - ci.range = static_cast<VkDeviceSize>(host_memory_size); - return ci; + return { + .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .buffer = buffer, + .format = + MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format, + .offset = 0, + .range = static_cast<VkDeviceSize>(host_memory_size), + }; } VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { @@ -130,23 +132,23 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP const auto [format, attachable, storage] = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); - VkImageCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.imageType = SurfaceTargetToImage(params.target); - ci.format = format; - ci.mipLevels = params.num_levels; - ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); - ci.samples = VK_SAMPLE_COUNT_1_BIT; - ci.tiling = VK_IMAGE_TILING_OPTIMAL; - ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - ci.queueFamilyIndexCount = 0; - ci.pQueueFamilyIndices = nullptr; - ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + VkImageCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = SurfaceTargetToImage(params.target), + .format = format, + .mipLevels = params.num_levels, + .arrayLayers = static_cast<u32>(params.GetNumLayers()), + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; if (attachable) { ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; @@ -321,22 +323,25 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { } VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { - VkBufferImageCopy copy; - copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); - copy.bufferRowLength = 0; - copy.bufferImageHeight = 0; - copy.imageSubresource.aspectMask = image->GetAspectMask(); - copy.imageSubresource.mipLevel = level; - copy.imageSubresource.baseArrayLayer = 0; - copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); - copy.imageOffset.x = 0; - copy.imageOffset.y = 0; - copy.imageOffset.z = 0; - copy.imageExtent.width = params.GetMipWidth(level); - copy.imageExtent.height = params.GetMipHeight(level); - copy.imageExtent.depth = - params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - return copy; + return { + .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = image->GetAspectMask(), + .mipLevel = level, + .baseArrayLayer = 0, + .layerCount = static_cast<u32>(params.GetNumLayers()), + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = params.GetMipWidth(level), + .height = params.GetMipHeight(level), + .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, + }, + }; } VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { @@ -416,20 +421,29 @@ VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSourc ASSERT(num_slices == params.depth); } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.viewType = image_view_type; - ci.format = surface.GetImage().GetFormat(); - ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; - ci.subresourceRange.aspectMask = aspect; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; - ci.subresourceRange.baseArrayLayer = base_layer; - ci.subresourceRange.layerCount = num_layers; - image_view = device.GetLogical().CreateImageView(ci); + image_view = device.GetLogical().CreateImageView({ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .viewType = image_view_type, + .format = surface.GetImage().GetFormat(), + .components = + { + .r = swizzle[0], + .g = swizzle[1], + .b = swizzle[2], + .a = swizzle[3], + }, + .subresourceRange = + { + .aspectMask = aspect, + .baseMipLevel = base_level, + .levelCount = num_levels, + .baseArrayLayer = base_layer, + .layerCount = num_layers, + }, + }); return last_image_view = *image_view; } @@ -439,17 +453,26 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } - VkImageViewCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.image = surface.GetImageHandle(); - ci.format = surface.GetImage().GetFormat(); - ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; - ci.subresourceRange.aspectMask = aspect_mask; - ci.subresourceRange.baseMipLevel = base_level; - ci.subresourceRange.levelCount = num_levels; + VkImageViewCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = surface.GetImageHandle(), + .format = surface.GetImage().GetFormat(), + .components = + { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = + { + .aspectMask = aspect_mask, + .baseMipLevel = base_level, + .levelCount = num_levels, + }, + }; if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; ci.subresourceRange.baseArrayLayer = base_slice; @@ -502,24 +525,40 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageCopy copy; - copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); - copy.srcSubresource.mipLevel = copy_params.source_level; - copy.srcSubresource.baseArrayLayer = copy_params.source_z; - copy.srcSubresource.layerCount = num_layers; - copy.srcOffset.x = copy_params.source_x; - copy.srcOffset.y = copy_params.source_y; - copy.srcOffset.z = 0; - copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); - copy.dstSubresource.mipLevel = copy_params.dest_level; - copy.dstSubresource.baseArrayLayer = dst_base_layer; - copy.dstSubresource.layerCount = num_layers; - copy.dstOffset.x = copy_params.dest_x; - copy.dstOffset.y = copy_params.dest_y; - copy.dstOffset.z = dst_offset_z; - copy.extent.width = copy_params.width; - copy.extent.height = copy_params.height; - copy.extent.depth = extent_z; + const VkImageCopy copy{ + .srcSubresource = + { + .aspectMask = src_surface->GetAspectMask(), + .mipLevel = copy_params.source_level, + .baseArrayLayer = copy_params.source_z, + .layerCount = num_layers, + }, + .srcOffset = + { + .x = static_cast<s32>(copy_params.source_x), + .y = static_cast<s32>(copy_params.source_y), + .z = 0, + }, + .dstSubresource = + { + .aspectMask = dst_surface->GetAspectMask(), + .mipLevel = copy_params.dest_level, + .baseArrayLayer = dst_base_layer, + .layerCount = num_layers, + }, + .dstOffset = + { + .x = static_cast<s32>(copy_params.dest_x), + .y = static_cast<s32>(copy_params.dest_y), + .z = static_cast<s32>(dst_offset_z), + }, + .extent = + { + .width = copy_params.width, + .height = copy_params.height, + .depth = extent_z, + }, + }; const VkImage src_image = src_surface->GetImageHandle(); const VkImage dst_image = dst_surface->GetImageHandle(); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 051298cc8..14cac38ea 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -377,24 +377,26 @@ VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffe Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions, InstanceDispatch& dld) noexcept { - VkApplicationInfo application_info; - application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; - application_info.pNext = nullptr; - application_info.pApplicationName = "yuzu Emulator"; - application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.pEngineName = "yuzu Emulator"; - application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); - application_info.apiVersion = VK_API_VERSION_1_1; - - VkInstanceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; - ci.pApplicationInfo = &application_info; - ci.enabledLayerCount = layers.size(); - ci.ppEnabledLayerNames = layers.data(); - ci.enabledExtensionCount = extensions.size(); - ci.ppEnabledExtensionNames = extensions.data(); + static constexpr VkApplicationInfo application_info{ + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = nullptr, + .pApplicationName = "yuzu Emulator", + .applicationVersion = VK_MAKE_VERSION(0, 1, 0), + .pEngineName = "yuzu Emulator", + .engineVersion = VK_MAKE_VERSION(0, 1, 0), + .apiVersion = VK_API_VERSION_1_1, + }; + + const VkInstanceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .pApplicationInfo = &application_info, + .enabledLayerCount = layers.size(), + .ppEnabledLayerNames = layers.data(), + .enabledExtensionCount = extensions.size(), + .ppEnabledExtensionNames = extensions.data(), + }; VkInstance instance; if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { @@ -425,19 +427,20 @@ std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices( DebugCallback Instance::TryCreateDebugCallback( PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { - VkDebugUtilsMessengerCreateInfoEXT ci; - ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - ci.pNext = nullptr; - ci.flags = 0; - ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - ci.pfnUserCallback = callback; - ci.pUserData = nullptr; + const VkDebugUtilsMessengerCreateInfoEXT ci{ + .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, + .pNext = nullptr, + .flags = 0, + .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, + .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, + .pfnUserCallback = callback, + .pUserData = nullptr, + }; VkDebugUtilsMessengerEXT messenger; if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { @@ -468,12 +471,13 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c } CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { - VkCommandBufferAllocateInfo ai; - ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - ai.pNext = nullptr; - ai.commandPool = handle; - ai.level = level; - ai.commandBufferCount = static_cast<u32>(num_buffers); + const VkCommandBufferAllocateInfo ai{ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = nullptr, + .commandPool = handle, + .level = level, + .commandBufferCount = static_cast<u32>(num_buffers), + }; std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers); switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { @@ -497,17 +501,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const { Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, Span<const char*> enabled_extensions, const void* next, DeviceDispatch& dld) noexcept { - VkDeviceCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - ci.pNext = next; - ci.flags = 0; - ci.queueCreateInfoCount = queues_ci.size(); - ci.pQueueCreateInfos = queues_ci.data(); - ci.enabledLayerCount = 0; - ci.ppEnabledLayerNames = nullptr; - ci.enabledExtensionCount = enabled_extensions.size(); - ci.ppEnabledExtensionNames = enabled_extensions.data(); - ci.pEnabledFeatures = nullptr; + const VkDeviceCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pNext = next, + .flags = 0, + .queueCreateInfoCount = queues_ci.size(), + .pQueueCreateInfos = queues_ci.data(), + .enabledLayerCount = 0, + .ppEnabledLayerNames = nullptr, + .enabledExtensionCount = enabled_extensions.size(), + .ppEnabledExtensionNames = enabled_extensions.data(), + .pEnabledFeatures = nullptr, + }; VkDevice device; if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { @@ -548,10 +553,11 @@ ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { } Semaphore Device::CreateSemaphore() const { - VkSemaphoreCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkSemaphoreCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; VkSemaphore object; Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); @@ -639,10 +645,12 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons } Event Device::CreateEvent() const { - VkEventCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; - ci.pNext = nullptr; - ci.flags = 0; + static constexpr VkEventCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + }; + VkEvent object; Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); return Event(object, handle, *dld); diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..b7f66d7ee --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp @@ -0,0 +1,181 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <chrono> +#include <condition_variable> +#include <mutex> +#include <thread> +#include <vector> +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_base.h" +#include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/shader/async_shaders.h" + +namespace VideoCommon::Shader { + +AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} + +AsyncShaders::~AsyncShaders() { + KillWorkers(); +} + +void AsyncShaders::AllocateWorkers(std::size_t num_workers) { + // If we're already have workers queued or don't want to queue workers, ignore + if (num_workers == worker_threads.size() || num_workers == 0) { + return; + } + + // If workers already exist, clear them + if (!worker_threads.empty()) { + FreeWorkers(); + } + + // Create workers + for (std::size_t i = 0; i < num_workers; i++) { + context_list.push_back(emu_window.CreateSharedContext()); + worker_threads.push_back(std::move( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); + } +} + +void AsyncShaders::FreeWorkers() { + // Mark all threads to quit + is_thread_exiting.store(true); + cv.notify_all(); + for (auto& thread : worker_threads) { + thread.join(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +void AsyncShaders::KillWorkers() { + is_thread_exiting.store(true); + for (auto& thread : worker_threads) { + thread.detach(); + } + // Clear our shared contexts + context_list.clear(); + + // Clear our worker threads + worker_threads.clear(); +} + +bool AsyncShaders::HasWorkQueued() { + return !pending_queue.empty(); +} + +bool AsyncShaders::HasCompletedWork() { + std::shared_lock lock{completed_mutex}; + return !finished_work.empty(); +} + +bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { + const auto& regs = gpu.Maxwell3D().regs; + + // If something is using depth, we can assume that games are not rendering anything which will + // be used one time. + if (regs.zeta_enable) { + return true; + } + + // If games are using a small index count, we can assume these are full screen quads. Usually + // these shaders are only used once for building textures so we can assume they can't be built + // async + if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { + return false; + } + + return true; +} + +std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { + std::vector<AsyncShaders::Result> results; + { + std::unique_lock lock{completed_mutex}; + results.assign(std::make_move_iterator(finished_work.begin()), + std::make_move_iterator(finished_work.end())); + finished_work.clear(); + } + return results; +} + +void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, + Tegra::Engines::ShaderType shader_type, u64 uid, + std::vector<u64> code, std::vector<u64> code_b, + u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, + VAddr cpu_addr) { + WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM + : AsyncShaders::Backend::OpenGL, + device, + shader_type, + uid, + std::move(code), + std::move(code_b), + main_offset, + compiler_settings, + registry, + cpu_addr}; + std::unique_lock lock(queue_mutex); + pending_queue.push_back(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { + using namespace std::chrono_literals; + while (!is_thread_exiting.load(std::memory_order_relaxed)) { + std::unique_lock lock{queue_mutex}; + cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); + if (is_thread_exiting) { + return; + } + + // Partial lock to allow all threads to read at the same time + if (!HasWorkQueued()) { + continue; + } + // Another thread beat us, just unlock and wait for the next load + if (pending_queue.empty()) { + continue; + } + // Pull work from queue + WorkerParams work = std::move(pending_queue.front()); + pending_queue.pop_front(); + + lock.unlock(); + + if (work.backend == AsyncShaders::Backend::OpenGL || + work.backend == AsyncShaders::Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + const auto scope = context->Acquire(); + auto program = + OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + Result result{}; + result.backend = work.backend; + result.cpu_address = work.cpu_address; + result.uid = work.uid; + result.code = std::move(work.code); + result.code_b = std::move(work.code_b); + result.shader_type = work.shader_type; + + if (work.backend == AsyncShaders::Backend::OpenGL) { + result.program.opengl = std::move(program->source_program); + } else if (work.backend == AsyncShaders::Backend::GLASM) { + result.program.glasm = std::move(program->assembly_program); + } + + { + std::unique_lock complete_lock(completed_mutex); + finished_work.push_back(std::move(result)); + } + } + } +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..2f5ee94ad --- /dev/null +++ b/src/video_core/shader/async_shaders.h @@ -0,0 +1,109 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <condition_variable> +#include <deque> +#include <memory> +#include <shared_mutex> +#include <thread> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" + +namespace Core::Frontend { +class EmuWindow; +class GraphicsContext; +} // namespace Core::Frontend + +namespace Tegra { +class GPU; +} + +namespace VideoCommon::Shader { + +class AsyncShaders { +public: + enum class Backend { + OpenGL, + GLASM, + }; + + struct ResultPrograms { + OpenGL::OGLProgram opengl; + OpenGL::OGLAssemblyProgram glasm; + }; + + struct Result { + u64 uid; + VAddr cpu_address; + Backend backend; + ResultPrograms program; + std::vector<u64> code; + std::vector<u64> code_b; + Tegra::Engines::ShaderType shader_type; + }; + + explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); + ~AsyncShaders(); + + /// Start up shader worker threads + void AllocateWorkers(std::size_t num_workers); + + /// Clear the shader queue and kill all worker threads + void FreeWorkers(); + + // Force end all threads + void KillWorkers(); + + /// Check to see if any shaders have actually been compiled + bool HasCompletedWork(); + + /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build + /// every shader async as some shaders are only built and executed once. We try to "guess" which + /// shader would be used only once + bool IsShaderAsync(const Tegra::GPU& gpu) const; + + /// Pulls completed compiled shaders + std::vector<Result> GetCompletedWork(); + + void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, + u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, + VideoCommon::Shader::CompilerSettings compiler_settings, + const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + +private: + void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); + + /// Check our worker queue to see if we have any work queued already + bool HasWorkQueued(); + + struct WorkerParams { + AsyncShaders::Backend backend; + OpenGL::Device device; + Tegra::Engines::ShaderType shader_type; + u64 uid; + std::vector<u64> code; + std::vector<u64> code_b; + u32 main_offset; + VideoCommon::Shader::CompilerSettings compiler_settings; + VideoCommon::Shader::Registry registry; + VAddr cpu_address; + }; + + std::condition_variable cv; + std::mutex queue_mutex; + std::shared_mutex completed_mutex; + std::atomic<bool> is_thread_exiting{}; + std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; + std::vector<std::thread> worker_threads; + std::deque<WorkerParams> pending_queue; + std::vector<AsyncShaders::Result> finished_work; + Core::Frontend::EmuWindow& emu_window; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp new file mode 100644 index 000000000..c3c71657d --- /dev/null +++ b/src/video_core/shader_notify.cpp @@ -0,0 +1,42 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/shader_notify.h" + +using namespace std::chrono_literals; + +namespace VideoCore { +namespace { +constexpr auto UPDATE_TICK = 32ms; +} + +ShaderNotify::ShaderNotify() = default; +ShaderNotify::~ShaderNotify() = default; + +std::size_t ShaderNotify::GetShadersBuilding() { + const auto now = std::chrono::high_resolution_clock::now(); + const auto diff = now - last_update; + if (diff > UPDATE_TICK) { + std::shared_lock lock(mutex); + last_updated_count = accurate_count; + } + return last_updated_count; +} + +std::size_t ShaderNotify::GetShadersBuildingAccurate() { + std::shared_lock lock{mutex}; + return accurate_count; +} + +void ShaderNotify::MarkShaderComplete() { + std::unique_lock lock{mutex}; + accurate_count--; +} + +void ShaderNotify::MarkSharderBuilding() { + std::unique_lock lock{mutex}; + accurate_count++; +} + +} // namespace VideoCore diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h new file mode 100644 index 000000000..a9c92d179 --- /dev/null +++ b/src/video_core/shader_notify.h @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <chrono> +#include <shared_mutex> +#include "common/common_types.h" + +namespace VideoCore { +class ShaderNotify { +public: + ShaderNotify(); + ~ShaderNotify(); + + std::size_t GetShadersBuilding(); + std::size_t GetShadersBuildingAccurate(); + + void MarkShaderComplete(); + void MarkSharderBuilding(); + +private: + std::size_t last_updated_count{}; + std::size_t accurate_count{}; + std::shared_mutex mutex; + std::chrono::high_resolution_clock::time_point last_update{}; +}; +} // namespace VideoCore diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index d25b99a32..805bb954b 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -661,6 +661,8 @@ void Config::ReadRendererValues() { ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), false); + ReadSettingGlobal(Settings::values.use_asynchronous_shaders, + QStringLiteral("use_asynchronous_shaders"), false); ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), true); ReadSettingGlobal(Settings::values.force_30fps_mode, QStringLiteral("force_30fps_mode"), false); @@ -1145,6 +1147,8 @@ void Config::SaveRendererValues() { WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, false); + WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), + Settings::values.use_asynchronous_shaders, false); WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); WriteSettingGlobal(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 7c0fa7ec5..ce30188cd 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -24,6 +24,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); ui->use_vsync->setEnabled(runtime_lock); ui->use_assembly_shaders->setEnabled(runtime_lock); + ui->use_asynchronous_shaders->setEnabled(runtime_lock); ui->force_30fps_mode->setEnabled(runtime_lock); ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); @@ -32,6 +33,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { static_cast<int>(Settings::values.gpu_accuracy.GetValue())); ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); + ui->use_asynchronous_shaders->setChecked( + Settings::values.use_asynchronous_shaders.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode.GetValue()); ui->anisotropic_filtering_combobox->setCurrentIndex( @@ -41,6 +44,10 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ConfigurationShared::SetPerGameSetting(ui->use_vsync, &Settings::values.use_vsync); ConfigurationShared::SetPerGameSetting(ui->use_assembly_shaders, &Settings::values.use_assembly_shaders); + ConfigurationShared::SetPerGameSetting(ui->use_asynchronous_shaders, + &Settings::values.use_asynchronous_shaders); + ConfigurationShared::SetPerGameSetting(ui->use_asynchronous_shaders, + &Settings::values.use_asynchronous_shaders); ConfigurationShared::SetPerGameSetting(ui->use_fast_gpu_time, &Settings::values.use_fast_gpu_time); ConfigurationShared::SetPerGameSetting(ui->force_30fps_mode, @@ -67,6 +74,14 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { if (Settings::values.use_assembly_shaders.UsingGlobal()) { Settings::values.use_assembly_shaders.SetValue(ui->use_assembly_shaders->isChecked()); } + if (Settings::values.use_asynchronous_shaders.UsingGlobal()) { + Settings::values.use_asynchronous_shaders.SetValue( + ui->use_asynchronous_shaders->isChecked()); + } + if (Settings::values.use_asynchronous_shaders.UsingGlobal()) { + Settings::values.use_asynchronous_shaders.SetValue( + ui->use_asynchronous_shaders->isChecked()); + } if (Settings::values.use_fast_gpu_time.UsingGlobal()) { Settings::values.use_fast_gpu_time.SetValue(ui->use_fast_gpu_time->isChecked()); } @@ -83,6 +98,10 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_assembly_shaders, ui->use_assembly_shaders); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, + ui->use_asynchronous_shaders); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, + ui->use_asynchronous_shaders); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, ui->use_fast_gpu_time); ConfigurationShared::ApplyPerGameSetting(&Settings::values.force_30fps_mode, @@ -117,6 +136,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); ui->use_assembly_shaders->setEnabled(Settings::values.use_assembly_shaders.UsingGlobal()); + ui->use_asynchronous_shaders->setEnabled( + Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); ui->force_30fps_mode->setEnabled(Settings::values.force_30fps_mode.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( @@ -128,6 +149,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ConfigurationShared::InsertGlobalItem(ui->gpu_accuracy); ui->use_vsync->setTristate(true); ui->use_assembly_shaders->setTristate(true); + ui->use_asynchronous_shaders->setTristate(true); ui->use_fast_gpu_time->setTristate(true); ui->force_30fps_mode->setTristate(true); ConfigurationShared::InsertGlobalItem(ui->anisotropic_filtering_combobox); diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 0021607ac..71e7dfe5e 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -73,6 +73,16 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_asynchronous_shaders"> + <property name="toolTip"> + <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> + </property> + <property name="text"> + <string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string> + </property> + </widget> + </item> + <item> <widget class="QCheckBox" name="force_30fps_mode"> <property name="text"> <string>Force 30 FPS mode</string> diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 9f758605a..6909d65d0 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -94,6 +94,8 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" +#include "video_core/gpu.h" +#include "video_core/shader_notify.h" #include "yuzu/about_dialog.h" #include "yuzu/bootmanager.h" #include "yuzu/compatdb.h" @@ -498,6 +500,8 @@ void GMainWindow::InitializeWidgets() { message_label->setAlignment(Qt::AlignLeft); statusBar()->addPermanentWidget(message_label, 1); + shader_building_label = new QLabel(); + shader_building_label->setToolTip(tr("The amount of shaders currently being built")); emu_speed_label = new QLabel(); emu_speed_label->setToolTip( tr("Current emulation speed. Values higher or lower than 100% " @@ -510,7 +514,8 @@ void GMainWindow::InitializeWidgets() { tr("Time taken to emulate a Switch frame, not counting framelimiting or v-sync. For " "full-speed emulation this should be at most 16.67 ms.")); - for (auto& label : {emu_speed_label, game_fps_label, emu_frametime_label}) { + for (auto& label : + {shader_building_label, emu_speed_label, game_fps_label, emu_frametime_label}) { label->setVisible(false); label->setFrameStyle(QFrame::NoFrame); label->setContentsMargins(4, 0, 4, 0); @@ -1176,6 +1181,7 @@ void GMainWindow::ShutdownGame() { // Disable status bar updates status_bar_update_timer.stop(); + shader_building_label->setVisible(false); emu_speed_label->setVisible(false); game_fps_label->setVisible(false); emu_frametime_label->setVisible(false); @@ -2186,6 +2192,17 @@ void GMainWindow::UpdateStatusBar() { } auto results = Core::System::GetInstance().GetAndResetPerfStats(); + auto& shader_notify = Core::System::GetInstance().GPU().ShaderNotify(); + const auto shaders_building = shader_notify.GetShadersBuilding(); + + if (shaders_building != 0) { + shader_building_label->setText( + tr("Building: %1 shader").arg(shaders_building) + + (shaders_building != 1 ? QString::fromStdString("s") : QString::fromStdString(""))); + shader_building_label->setVisible(true); + } else { + shader_building_label->setVisible(false); + } if (Settings::values.use_frame_limit.GetValue()) { emu_speed_label->setText(tr("Speed: %1% / %2%") @@ -2315,9 +2332,12 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { if (behavior == ReinitializeKeyBehavior::Warning) { const auto res = QMessageBox::information( this, tr("Confirm Key Rederivation"), - tr("You are about to force rederive all of your keys. \nIf you do not know what this " - "means or what you are doing, \nthis is a potentially destructive action. \nPlease " - "make sure this is what you want \nand optionally make backups.\n\nThis will delete " + tr("You are about to force rederive all of your keys. \nIf you do not know what " + "this " + "means or what you are doing, \nthis is a potentially destructive action. " + "\nPlease " + "make sure this is what you want \nand optionally make backups.\n\nThis will " + "delete " "your autogenerated key files and re-run the key derivation module."), QMessageBox::StandardButtons{QMessageBox::Ok, QMessageBox::Cancel}); @@ -2628,8 +2648,8 @@ int main(int argc, char* argv[]) { #ifdef __APPLE__ // If you start a bundle (binary) on OSX without the Terminal, the working directory is "/". - // But since we require the working directory to be the executable path for the location of the - // user folder in the Qt Frontend, we need to cd into that working directory + // But since we require the working directory to be the executable path for the location of + // the user folder in the Qt Frontend, we need to cd into that working directory const std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + ".."; chdir(bin_path.c_str()); #endif diff --git a/src/yuzu/main.h b/src/yuzu/main.h index adff65fb5..59d9073ae 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -248,6 +248,7 @@ private: // Status bar elements QLabel* message_label = nullptr; + QLabel* shader_building_label = nullptr; QLabel* emu_speed_label = nullptr; QLabel* game_fps_label = nullptr; QLabel* emu_frametime_label = nullptr; diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 7773228c8..c2a2982fb 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -394,6 +394,10 @@ void Config::ReadValues() { static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1))); Settings::values.use_assembly_shaders.SetValue( sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false)); + Settings::values.use_asynchronous_shaders.SetValue( + sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); + Settings::values.use_asynchronous_shaders.SetValue( + sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); Settings::values.use_fast_gpu_time.SetValue( sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 5bed47fd7..aa9e40380 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -166,6 +166,10 @@ use_vsync = # 0 (default): Off, 1: On use_assembly_shaders = +# Whether to allow asynchronous shader building. +# 0 (default): Off, 1: On +use_asynchronous_shaders = + # Turns on the frame limiter, which will limit frames output to the target game speed # 0: Off, 1: On (default) use_frame_limit = |