diff options
23 files changed, 308 insertions, 86 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6068c7a1f..a9f68a8f2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -105,6 +105,8 @@ if (MSVC) set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) else() add_compile_options( + -fwrapv + -Werror=all -Werror=extra -Werror=missing-declarations @@ -129,7 +131,6 @@ else() if (ARCHITECTURE_x86_64) add_compile_options("-mcx16") - add_compile_options("-fwrapv") endif() if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang) diff --git a/src/core/core.cpp b/src/core/core.cpp index 2f67e60a9..e95ae80da 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -273,7 +273,8 @@ struct System::Impl { time_manager.Initialize(); is_powered_on = true; - exit_lock = false; + exit_locked = false; + exit_requested = false; microprofile_cpu[0] = MICROPROFILE_TOKEN(ARM_CPU0); microprofile_cpu[1] = MICROPROFILE_TOKEN(ARM_CPU1); @@ -398,7 +399,8 @@ struct System::Impl { } is_powered_on = false; - exit_lock = false; + exit_locked = false; + exit_requested = false; if (gpu_core != nullptr) { gpu_core->NotifyShutdown(); @@ -507,7 +509,8 @@ struct System::Impl { CpuManager cpu_manager; std::atomic_bool is_powered_on{}; - bool exit_lock = false; + bool exit_locked = false; + bool exit_requested = false; bool nvdec_active{}; @@ -943,12 +946,20 @@ const Service::Time::TimeManager& System::GetTimeManager() const { return impl->time_manager; } -void System::SetExitLock(bool locked) { - impl->exit_lock = locked; +void System::SetExitLocked(bool locked) { + impl->exit_locked = locked; } -bool System::GetExitLock() const { - return impl->exit_lock; +bool System::GetExitLocked() const { + return impl->exit_locked; +} + +void System::SetExitRequested(bool requested) { + impl->exit_requested = requested; +} + +bool System::GetExitRequested() const { + return impl->exit_requested; } void System::SetApplicationProcessBuildID(const CurrentBuildProcessID& id) { diff --git a/src/core/core.h b/src/core/core.h index c70ea1965..a9ff9315e 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -412,8 +412,11 @@ public: /// Gets an immutable reference to the Room Network. [[nodiscard]] const Network::RoomNetwork& GetRoomNetwork() const; - void SetExitLock(bool locked); - [[nodiscard]] bool GetExitLock() const; + void SetExitLocked(bool locked); + bool GetExitLocked() const; + + void SetExitRequested(bool requested); + bool GetExitRequested() const; void SetApplicationProcessBuildID(const CurrentBuildProcessID& id); [[nodiscard]] const CurrentBuildProcessID& GetApplicationProcessBuildID() const; diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp index 44e6852fe..7d2f0abb8 100644 --- a/src/core/file_sys/content_archive.cpp +++ b/src/core/file_sys/content_archive.cpp @@ -22,6 +22,10 @@ namespace FileSys { +static u8 MasterKeyIdForKeyGeneration(u8 key_generation) { + return std::max<u8>(key_generation, 1) - 1; +} + NCA::NCA(VirtualFile file_, const NCA* base_nca) : file(std::move(file_)), keys{Core::Crypto::KeyManager::Instance()} { if (file == nullptr) { @@ -41,12 +45,17 @@ NCA::NCA(VirtualFile file_, const NCA* base_nca) return; } + // Ensure we have the proper key area keys to continue. + const u8 master_key_id = MasterKeyIdForKeyGeneration(reader->GetKeyGeneration()); + if (!keys.HasKey(Core::Crypto::S128KeyType::KeyArea, master_key_id, reader->GetKeyIndex())) { + status = Loader::ResultStatus::ErrorMissingKeyAreaKey; + return; + } + RightsId rights_id{}; reader->GetRightsId(rights_id.data(), rights_id.size()); if (rights_id != RightsId{}) { // External decryption key required; provide it here. - const auto key_generation = std::max<s32>(reader->GetKeyGeneration(), 1) - 1; - u128 rights_id_u128; std::memcpy(rights_id_u128.data(), rights_id.data(), sizeof(rights_id)); @@ -57,12 +66,12 @@ NCA::NCA(VirtualFile file_, const NCA* base_nca) return; } - if (!keys.HasKey(Core::Crypto::S128KeyType::Titlekek, key_generation)) { + if (!keys.HasKey(Core::Crypto::S128KeyType::Titlekek, master_key_id)) { status = Loader::ResultStatus::ErrorMissingTitlekek; return; } - auto titlekek = keys.GetKey(Core::Crypto::S128KeyType::Titlekek, key_generation); + auto titlekek = keys.GetKey(Core::Crypto::S128KeyType::Titlekek, master_key_id); Core::Crypto::AESCipher<Core::Crypto::Key128> cipher(titlekek, Core::Crypto::Mode::ECB); cipher.Transcode(titlekey.data(), titlekey.size(), titlekey.data(), Core::Crypto::Op::Decrypt); diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index da33f0e44..e92f400de 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -341,7 +341,7 @@ void ISelfController::Exit(HLERequestContext& ctx) { void ISelfController::LockExit(HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); - system.SetExitLock(true); + system.SetExitLocked(true); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); @@ -350,10 +350,14 @@ void ISelfController::LockExit(HLERequestContext& ctx) { void ISelfController::UnlockExit(HLERequestContext& ctx) { LOG_DEBUG(Service_AM, "called"); - system.SetExitLock(false); + system.SetExitLocked(false); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ResultSuccess); + + if (system.GetExitRequested()) { + system.Exit(); + } } void ISelfController::EnterFatalSection(HLERequestContext& ctx) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 3ad668a47..d9872ecc2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp @@ -558,7 +558,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, if (multi_component) { if (info.num_derivates >= 3) { const auto offset_vec{ctx.var_alloc.Consume(offset)}; - ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yz, {}.y));", texel, texture, + ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); return; } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 7d901c04b..34240b36f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -91,6 +91,34 @@ public: } } + explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, + Id offset, Id lod_clamp) { + if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { + throw LogicError("Derivates must be present"); + } + boost::container::static_vector<Id, 3> deriv_1_accum{ + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), + ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), + }; + boost::container::static_vector<Id, 3> deriv_2_accum{ + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), + ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), + ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), + }; + const Id derivates_id1{ctx.OpCompositeConstruct( + ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; + const Id derivates_id2{ctx.OpCompositeConstruct( + ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; + Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); + if (Sirit::ValidId(offset)) { + Add(spv::ImageOperandsMask::Offset, offset); + } + if (has_lod_clamp) { + Add(spv::ImageOperandsMask::MinLod, lod_clamp); + } + } + std::span<const Id> Span() const noexcept { return std::span{operands.data(), operands.size()}; } @@ -524,8 +552,11 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id derivates, Id offset, Id lod_clamp) { const auto info{inst->Flags<IR::TextureInstInfo>()}; - const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, - offset, lod_clamp); + const auto operands = + info.num_derivates == 3 + ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) + : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, + lod_clamp); return Emit(&EmitContext::OpImageSparseSampleExplicitLod, &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 753c62098..e593132e6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -161,7 +161,8 @@ enum class SpecialRegister : u64 { LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); return ir.Imm32(0); // This is the default value hardware returns. default: - throw NotImplementedException("S2R special register {}", special_register); + LOG_CRITICAL(Shader, "(STUBBED) Special register {}", special_register); + return ir.Imm32(0); // This is the default value hardware returns. } } } // Anonymous namespace diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 9f1b340a9..58ce0d8c2 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -14,6 +14,7 @@ namespace Tegra { constexpr u32 MacroRegistersStart = 0xE00; +constexpr u32 ComputeInline = 0x6D; DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) @@ -83,12 +84,35 @@ bool DmaPusher::Step() { dma_state.dma_get, command_list_header.size * sizeof(u32)); } } - Core::Memory::GpuGuestMemory<Tegra::CommandHeader, - Core::Memory::GuestMemoryFlags::UnsafeRead> - headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); - ProcessCommands(headers); + const auto safe_process = [&] { + Core::Memory::GpuGuestMemory<Tegra::CommandHeader, + Core::Memory::GuestMemoryFlags::SafeRead> + headers(memory_manager, dma_state.dma_get, command_list_header.size, + &command_headers); + ProcessCommands(headers); + }; + const auto unsafe_process = [&] { + Core::Memory::GpuGuestMemory<Tegra::CommandHeader, + Core::Memory::GuestMemoryFlags::UnsafeRead> + headers(memory_manager, dma_state.dma_get, command_list_header.size, + &command_headers); + ProcessCommands(headers); + }; + if (Settings::IsGPULevelHigh()) { + if (dma_state.method >= MacroRegistersStart) { + unsafe_process(); + return true; + } + if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute && + dma_state.method == ComputeInline) { + unsafe_process(); + return true; + } + safe_process(); + return true; + } + unsafe_process(); } - return true; } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 8a2784cdc..c9fab2d90 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -130,8 +130,10 @@ public: void DispatchCalls(); - void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) { + void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, + Engines::EngineTypes engine_type) { subchannels[subchannel_id] = engine; + subchannel_type[subchannel_id] = engine_type; } void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); @@ -170,6 +172,7 @@ private: const bool ib_enable{true}; ///< IB mode enabled std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; + std::array<Engines::EngineTypes, max_subchannels> subchannel_type; GPU& gpu; Core::System& system; diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 392322358..54631ee6c 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -11,6 +11,14 @@ namespace Tegra::Engines { +enum class EngineTypes : u32 { + KeplerCompute, + Maxwell3D, + Fermi2D, + MaxwellDMA, + KeplerMemory, +}; + class EngineInterface { public: virtual ~EngineInterface() = default; diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 7242d2529..21bf8aeb4 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -69,6 +69,14 @@ public: /// Binds a rasterizer to this engine. void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + GPUVAddr ExecTargetAddress() const { + return regs.dest.Address(); + } + + u32 GetUploadSize() const { + return copy_size; + } + private: void ProcessData(std::span<const u8> read_buffer); diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a38d9528a..cd61ab222 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal switch (method) { case KEPLER_COMPUTE_REG_INDEX(exec_upload): { + UploadInfo info{.upload_address = upload_address, + .exec_address = upload_state.ExecTargetAddress(), + .copy_size = upload_state.GetUploadSize()}; + uploads.push_back(info); upload_state.ProcessExec(regs.exec_upload.linear != 0); break; } case KEPLER_COMPUTE_REG_INDEX(data_upload): { + upload_address = current_dma_segment; upload_state.ProcessData(method_argument, is_last_call); break; } - case KEPLER_COMPUTE_REG_INDEX(launch): + case KEPLER_COMPUTE_REG_INDEX(launch): { + const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); + + for (auto& data : uploads) { + const GPUVAddr offset = data.exec_address - launch_desc_loc; + if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) && + memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) { + indirect_compute = {data.upload_address}; + } + } + uploads.clear(); ProcessLaunch(); + indirect_compute = std::nullopt; break; + } default: break; } @@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun u32 methods_pending) { switch (method) { case KEPLER_COMPUTE_REG_INDEX(data_upload): + upload_address = current_dma_segment; upload_state.ProcessData(base_start, amount); return; default: diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 2092e685f..735e05fb4 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -5,6 +5,7 @@ #include <array> #include <cstddef> +#include <optional> #include <vector> #include "common/bit_field.h" #include "common/common_funcs.h" @@ -36,6 +37,9 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) +#define LAUNCH_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32)) + class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); @@ -201,6 +205,10 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; + std::optional<GPUVAddr> GetIndirectComputeAddress() const { + return indirect_compute; + } + private: void ProcessLaunch(); @@ -216,6 +224,15 @@ private: MemoryManager& memory_manager; VideoCore::RasterizerInterface* rasterizer = nullptr; Upload::State upload_state; + GPUVAddr upload_address; + + struct UploadInfo { + GPUVAddr upload_address; + GPUVAddr exec_address; + u32 copy_size; + }; + std::vector<UploadInfo> uploads; + std::optional<GPUVAddr> indirect_compute{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c3696096d..06e349e43 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -257,6 +257,7 @@ u32 Maxwell3D::GetMaxCurrentVertices() { const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); num_vertices = std::max( num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); + break; } return num_vertices; } @@ -269,10 +270,13 @@ size_t Maxwell3D::EstimateIndexBufferSize() { std::numeric_limits<u32>::max()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); const size_t log2_byte_size = Common::Log2Ceil64(byte_size); + const size_t cap{GetMaxCurrentVertices() * 3 * byte_size}; + const size_t lower_cap = + std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); return std::min<size_t>( memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) / byte_size, - static_cast<size_t>(end_address - start_address)); + lower_cap); } u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 7718a09b3..6de2543b7 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: - dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel, + EngineTypes::Fermi2D); break; case EngineID::MAXWELL_B: - dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel, + EngineTypes::Maxwell3D); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel, + EngineTypes::KeplerCompute); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel, + EngineTypes::MaxwellDMA); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel, + EngineTypes::KeplerMemory); break; default: UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1ba31be88..dd03efecd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() { pipeline->SetEngine(kepler_compute, gpu_memory); pipeline->Configure(); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle()); + glDispatchComputeIndirect(static_cast<GLintptr>(offset)); + return; + } glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; has_written_global_memory |= pipeline->WritesGlobalMemory(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fe432dfe1..4f83a88e1 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -665,6 +665,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( std::move(modules), infos); } catch (const Shader::Exception& exception) { + auto hash = key.Hash(); + size_t env_index{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + env.Dump(hash, key.unique_hashes[index]); + } LOG_ERROR(Render_Vulkan, "{}", exception.what()); return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 032f694bc..01e76a82c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -463,6 +463,20 @@ void RasterizerVulkan::DispatchCompute() { pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([indirect_buffer = buffer->Handle(), + indirect_offset = offset](vk::CommandBuffer cmdbuf) { + cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset); + }); + return; + } const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 78e5a248f..c3f388d89 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -92,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdCopyImage); X(vkCmdCopyImageToBuffer); X(vkCmdDispatch); + X(vkCmdDispatchIndirect); X(vkCmdDraw); X(vkCmdDrawIndexed); X(vkCmdDrawIndirect); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index c226a2a29..049fa8038 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -203,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdCopyImage vkCmdCopyImage{}; PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; PFN_vkCmdDispatch vkCmdDispatch{}; + PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; @@ -1209,6 +1210,10 @@ public: dld->vkCmdDispatch(handle, x, y, z); } + void DispatchIndirect(VkBuffer indirect_buffer, VkDeviceSize offset) const noexcept { + dld->vkCmdDispatchIndirect(handle, indirect_buffer, offset); + } + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers, Span<VkBufferMemoryBarrier> buffer_barriers, diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 33c9fd0af..f2e6c03f0 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -2010,8 +2010,16 @@ bool GMainWindow::OnShutdownBegin() { emit EmulationStopping(); + int shutdown_time = 1000; + + if (system->DebuggerEnabled()) { + shutdown_time = 0; + } else if (system->GetExitLocked()) { + shutdown_time = 5000; + } + shutdown_timer.setSingleShot(true); - shutdown_timer.start(system->DebuggerEnabled() ? 0 : 5000); + shutdown_timer.start(shutdown_time); connect(&shutdown_timer, &QTimer::timeout, this, &GMainWindow::OnEmulationStopTimeExpired); connect(emu_thread.get(), &QThread::finished, this, &GMainWindow::OnEmulationStopped); @@ -2573,50 +2581,48 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa return; } - FileSys::VirtualFile base_romfs; - if (loader->ReadRomFS(base_romfs) != Loader::ResultStatus::Success) { - failed(); - return; - } + FileSys::VirtualFile packed_update_raw{}; + loader->ReadUpdateRaw(packed_update_raw); const auto& installed = system->GetContentProvider(); - const auto romfs_title_id = SelectRomFSDumpTarget(installed, program_id); - if (!romfs_title_id) { + u64 title_id{}; + u8 raw_type{}; + if (!SelectRomFSDumpTarget(installed, program_id, &title_id, &raw_type)) { failed(); return; } - const auto type = *romfs_title_id == program_id ? FileSys::ContentRecordType::Program - : FileSys::ContentRecordType::Data; - const auto base_nca = installed.GetEntry(*romfs_title_id, type); + const auto type = static_cast<FileSys::ContentRecordType>(raw_type); + const auto base_nca = installed.GetEntry(title_id, type); if (!base_nca) { failed(); return; } + const FileSys::NCA update_nca{packed_update_raw, nullptr}; + if (type != FileSys::ContentRecordType::Program || + update_nca.GetStatus() != Loader::ResultStatus::ErrorMissingBKTRBaseRomFS || + update_nca.GetTitleId() != FileSys::GetUpdateTitleID(title_id)) { + packed_update_raw = {}; + } + + const auto base_romfs = base_nca->GetRomFS(); + if (!base_romfs) { + failed(); + return; + } + const auto dump_dir = target == DumpRomFSTarget::Normal ? Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir) : Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "atmosphere" / "contents"; - const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id); + const auto romfs_dir = fmt::format("{:016X}/romfs", title_id); const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir); - FileSys::VirtualFile romfs; - - if (*romfs_title_id == program_id) { - const FileSys::PatchManager pm{program_id, system->GetFileSystemController(), installed}; - romfs = pm.PatchRomFS(base_nca.get(), base_romfs, type, nullptr, false); - } else { - romfs = installed.GetEntry(*romfs_title_id, type)->GetRomFS(); - } - - const auto extracted = FileSys::ExtractRomFS(romfs, FileSys::RomFSExtractionType::Full); - if (extracted == nullptr) { - failed(); - return; - } + const FileSys::PatchManager pm{title_id, system->GetFileSystemController(), installed}; + auto romfs = pm.PatchRomFS(base_nca.get(), base_romfs, type, packed_update_raw, false); const auto out = VfsFilesystemCreateDirectoryWrapper(vfs, path, FileSys::Mode::ReadWrite); @@ -2640,6 +2646,12 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa return; } + const auto extracted = FileSys::ExtractRomFS(romfs, FileSys::RomFSExtractionType::Full); + if (extracted == nullptr) { + failed(); + return; + } + const auto full = res == selections.constFirst(); const auto entry_size = CalculateRomFSEntrySize(extracted, full); @@ -3261,7 +3273,7 @@ void GMainWindow::OnPauseContinueGame() { } void GMainWindow::OnStopGame() { - if (system->GetExitLock() && !ConfirmForceLockedExit()) { + if (system->GetExitLocked() && !ConfirmForceLockedExit()) { return; } @@ -4350,28 +4362,41 @@ bool GMainWindow::CheckSystemArchiveDecryption() { return mii_nca->GetRomFS().get() != nullptr; } -std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, - u64 program_id) { - const auto dlc_entries = - installed.ListEntriesFilter(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data); - std::vector<FileSys::ContentProviderEntry> dlc_match; - dlc_match.reserve(dlc_entries.size()); - std::copy_if(dlc_entries.begin(), dlc_entries.end(), std::back_inserter(dlc_match), - [&program_id, &installed](const FileSys::ContentProviderEntry& entry) { - return FileSys::GetBaseTitleID(entry.title_id) == program_id && - installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success; - }); - - std::vector<u64> romfs_tids; - romfs_tids.push_back(program_id); - for (const auto& entry : dlc_match) { - romfs_tids.push_back(entry.title_id); - } - - if (romfs_tids.size() > 1) { - QStringList list{QStringLiteral("Base")}; - for (std::size_t i = 1; i < romfs_tids.size(); ++i) { - list.push_back(QStringLiteral("DLC %1").arg(romfs_tids[i] & 0x7FF)); +bool GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, u64 program_id, + u64* selected_title_id, u8* selected_content_record_type) { + using ContentInfo = std::pair<FileSys::TitleType, FileSys::ContentRecordType>; + boost::container::flat_map<u64, ContentInfo> available_title_ids; + + const auto RetrieveEntries = [&](FileSys::TitleType title_type, + FileSys::ContentRecordType record_type) { + const auto entries = installed.ListEntriesFilter(title_type, record_type); + for (const auto& entry : entries) { + if (FileSys::GetBaseTitleID(entry.title_id) == program_id && + installed.GetEntry(entry)->GetStatus() == Loader::ResultStatus::Success) { + available_title_ids[entry.title_id] = {title_type, record_type}; + } + } + }; + + RetrieveEntries(FileSys::TitleType::Application, FileSys::ContentRecordType::Program); + RetrieveEntries(FileSys::TitleType::AOC, FileSys::ContentRecordType::Data); + + if (available_title_ids.empty()) { + return false; + } + + size_t title_index = 0; + + if (available_title_ids.size() > 1) { + QStringList list; + for (auto& [title_id, content_info] : available_title_ids) { + const auto hex_title_id = QString::fromStdString(fmt::format("{:X}", title_id)); + if (content_info.first == FileSys::TitleType::Application) { + list.push_back(QStringLiteral("Application [%1]").arg(hex_title_id)); + } else { + list.push_back( + QStringLiteral("DLC %1 [%2]").arg(title_id & 0x7FF).arg(hex_title_id)); + } } bool ok; @@ -4379,13 +4404,16 @@ std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProv this, tr("Select RomFS Dump Target"), tr("Please select which RomFS you would like to dump."), list, 0, false, &ok); if (!ok) { - return {}; + return false; } - return romfs_tids[list.indexOf(res)]; + title_index = list.indexOf(res); } - return program_id; + const auto selected_info = available_title_ids.nth(title_index); + *selected_title_id = selected_info->first; + *selected_content_record_type = static_cast<u8>(selected_info->second.second); + return true; } bool GMainWindow::ConfirmClose() { @@ -4515,6 +4543,8 @@ void GMainWindow::RequestGameExit() { auto applet_ae = sm.GetService<Service::AM::AppletAE>("appletAE"); bool has_signalled = false; + system->SetExitRequested(true); + if (applet_oe != nullptr) { applet_oe->GetMessageQueue()->RequestExit(); has_signalled = true; diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 1b7055122..668dbc3b1 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -375,7 +375,8 @@ private: void RemoveAllTransferableShaderCaches(u64 program_id); void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); void RemoveCacheStorage(u64 program_id); - std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); + bool SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id, + u64* selected_title_id, u8* selected_content_record_type); InstallResult InstallNSPXCI(const QString& filename); InstallResult InstallNCA(const QString& filename); void MigrateConfigFiles(); |