diff options
28 files changed, 227 insertions, 101 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9d0af02fd..e40e9b0a5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ if (MSVC) else() add_compile_options( -Wall + -Werror=reorder -Wno-attributes ) diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index e226e9711..e77e82b8d 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -348,6 +348,12 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t if (ext_dir != nullptr) layers_ext.push_back(std::move(ext_dir)); } + + // When there are no layers to apply, return early as there is no need to rebuild the RomFS + if (layers.empty() && layers_ext.empty()) { + return; + } + layers.push_back(std::move(extracted)); auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers)); diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index e47f1deed..014d647cf 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -103,7 +103,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ struct KernelCore::Impl { explicit Impl(Core::System& system, KernelCore& kernel) - : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {} + : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {} void Initialize(KernelCore& kernel) { Shutdown(); diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 6aadb3ea8..7938b4b80 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -27,7 +27,7 @@ public: {10110, nullptr, "GetFriendProfileImage"}, {10200, nullptr, "SendFriendRequestForApplication"}, {10211, nullptr, "AddFacedFriendRequestForApplication"}, - {10400, nullptr, "GetBlockedUserListIds"}, + {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, {10500, nullptr, "GetProfileList"}, {10600, nullptr, "DeclareOpenOnlinePlaySession"}, {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, @@ -121,6 +121,15 @@ private: }; static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); + void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) { + // This is safe to stub, as there should be no adverse consequences from reporting no + // blocked users. + LOG_WARNING(Service_ACC, "(STUBBED) called"); + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(0); // Indicates there are no blocked users + } + void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { // Stub used by Splatoon 2 LOG_WARNING(Service_ACC, "(STUBBED) called"); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 2977a7d81..5cf6a4cc3 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -303,6 +303,10 @@ public: return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } + bool IsConstant() const { + return constant; + } + bool IsValid() const { return size != Size::Invalid; } diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c66c66f6c..5e9cfba22 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1006,6 +1006,12 @@ union Instruction { } stg; union { + BitField<23, 3, AtomicOp> operation; + BitField<48, 1, u64> extended; + BitField<20, 3, GlobalAtomicType> type; + } red; + + union { BitField<52, 4, AtomicOp> operation; BitField<49, 3, GlobalAtomicType> type; BitField<28, 20, s64> offset; @@ -1787,6 +1793,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + RED, // Reduction operation ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory @@ -1871,7 +1878,8 @@ public: ICMP_R, ICMP_CR, ICMP_IMM, - FCMP_R, + FCMP_RR, + FCMP_RC, MUFU, // Multi-Function Operator RRO_C, // Range Reduction Operator RRO_R, @@ -2096,6 +2104,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("1110101111111---", Id::RED, Type::Memory, "RED"), INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), @@ -2179,7 +2188,8 @@ private: INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), - INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"), + INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), + INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index cc434faf7..20e73a37e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -12,8 +12,9 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), - cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} + : GPU(system, std::move(renderer_), true), gpu_thread{system}, + cpu_context(renderer->GetRenderWindow().CreateSharedContext()), + gpu_context(std::move(context)) {} GPUAsynch::~GPUAsynch() = default; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c286502ba..d83dca25a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -87,7 +87,7 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; - static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4}; + static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f31d960c7..f4598fbf7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() { const auto attrib = gpu.regs.vertex_attrib_format[index]; const auto gl_index = static_cast<GLuint>(index); - // Ignore invalid attributes. - if (!attrib.IsValid()) { + // Disable constant attributes. + if (attrib.IsConstant()) { glDisableVertexAttribArray(gl_index); continue; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6d2ff20f9..12c6dcfde 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -34,6 +34,8 @@ namespace OpenGL { using Tegra::Engines::ShaderType; +using VideoCommon::Shader::CompileDepth; +using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::Registry; using VideoCommon::Shader::ShaderIR; @@ -43,7 +45,7 @@ namespace { constexpr u32 STAGE_MAIN_OFFSET = 10; constexpr u32 KERNEL_MAIN_OFFSET = 0; -constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; +constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 1f1f01313..b1804e9ea 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1821,13 +1821,15 @@ private: Expression HMergeH0(Operation operation) { const std::string dest = VisitOperand(operation, 0).AsUint(); const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("bitfieldInsert({}, {}, 0, 16)", dest, src), Type::Uint}; + return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), + Type::HalfFloat}; } Expression HMergeH1(Operation operation) { const std::string dest = VisitOperand(operation, 0).AsUint(); const std::string src = VisitOperand(operation, 1).AsUint(); - return {fmt::format("bitfieldInsert({}, {}, 16, 16)", dest, src), Type::Uint}; + return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), + Type::HalfFloat}; } Expression HPack2(Operation operation) { @@ -2117,8 +2119,14 @@ private: return {}; } return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).As(type)), - type}; + Visit(operation[1]).AsUint()), + Type::Uint}; + } + + template <const std::string_view& opname, Type type> + Expression Reduce(Operation operation) { + code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); + return {}; } Expression Branch(Operation operation) { @@ -2477,6 +2485,20 @@ private: &GLSLDecompiler::Atomic<Func::Or, Type::Int>, &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, + &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, + &GLSLDecompiler::Reduce<Func::And, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, + &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, + + &GLSLDecompiler::Reduce<Func::Add, Type::Int>, + &GLSLDecompiler::Reduce<Func::Min, Type::Int>, + &GLSLDecompiler::Reduce<Func::Max, Type::Int>, + &GLSLDecompiler::Reduce<Func::And, Type::Int>, + &GLSLDecompiler::Reduce<Func::Or, Type::Int>, + &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, + &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 0b4d999d7..2729d1265 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -417,7 +417,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { switch (params.target) { case SurfaceTarget::Texture2DArray: - glFramebufferTexture(target, attachment, GetTexture(), params.base_level); + glFramebufferTexture(target, attachment, GetTexture(), 0); break; default: UNIMPLEMENTED(); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f1a28cc21..b2a179746 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -315,8 +315,8 @@ public: RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, Core::Frontend::GraphicsContext& context) - : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, - frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} + : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, + has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 143478863..8681b821f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -360,6 +360,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib default: break; } + break; case Maxwell::VertexAttribute::Type::UnsignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: @@ -370,6 +371,14 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R8G8B8_UINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return VK_FORMAT_R8G8B8A8_UINT; + case Maxwell::VertexAttribute::Size::Size_16: + return VK_FORMAT_R16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16: + return VK_FORMAT_R16G16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return VK_FORMAT_R16G16B16_UINT; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UINT; case Maxwell::VertexAttribute::Size::Size_32: return VK_FORMAT_R32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32: @@ -381,6 +390,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib default: break; } + break; case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 21644a7e7..fbd406f2b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -535,7 +535,9 @@ void VKBlitScreen::CreateGraphicsPipeline() { viewport_state_ci.pNext = nullptr; viewport_state_ci.flags = 0; viewport_state_ci.viewportCount = 1; + viewport_state_ci.pViewports = nullptr; viewport_state_ci.scissorCount = 1; + viewport_state_ci.pScissors = nullptr; VkPipelineRasterizationStateCreateInfo rasterization_ci; rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 35ee54d30..5b6858e9b 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -32,7 +32,7 @@ public: * memory. When passing false, it will try to allocate device local memory. * @returns A memory commit. */ - VKMemoryCommit Commit(const VkMemoryRequirements& reqs, bool host_visible); + VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible); /// Commits memory required by the buffer and binds it. VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 33cbc0bb6..774ba1f26 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -62,13 +62,16 @@ constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::Sha VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { const auto& src = regs.viewport_transform[index]; + const float width = src.scale_x * 2.0f; + const float height = src.scale_y * 2.0f; + VkViewport viewport; viewport.x = src.translate_x - src.scale_x; viewport.y = src.translate_y - src.scale_y; - viewport.width = src.scale_x * 2.0f; - viewport.height = src.scale_y * 2.0f; + viewport.width = width != 0.0f ? width : 1.0f; + viewport.height = height != 0.0f ? height : 1.0f; - const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; viewport.minDepth = src.translate_z - src.scale_z * reduce_z; viewport.maxDepth = src.translate_z + src.scale_z; if (!device.IsExtDepthRangeUnrestrictedSupported()) { diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 62e4ca488..aaa138f52 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1938,11 +1938,8 @@ private: return {}; } - template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, - Type value_type = result_type> + template <Id (Module::*func)(Id, Id, Id, Id, Id)> Expression Atomic(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - Id pointer; if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { pointer = GetSharedMemoryPointer(*smem); @@ -1950,15 +1947,19 @@ private: pointer = GetGlobalMemoryPointer(*gmem); } else { UNREACHABLE(); - return {Constant(type_def, 0), result_type}; + return {v_float_zero, Type::Float}; } - - const Id value = As(Visit(operation[1]), value_type); - const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = Constant(type_def, 0); + const Id semantics = Constant(t_uint, 0); + const Id value = AsUint(Visit(operation[1])); + + return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; + } - return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; + template <Id (Module::*func)(Id, Id, Id, Id, Id)> + Expression Reduce(Operation operation) { + Atomic<func>(operation); + return {}; } Expression Branch(Operation operation) { @@ -2547,21 +2548,35 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, - - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 2e2711350..6d313963a 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -484,17 +484,17 @@ bool TryInspectAddress(CFGRebuildState& state) { } case BlockCollision::Inside: { // This case is the tricky one: - // We need to Split the block in 2 sepparate blocks + // We need to split the block into 2 separate blocks const u32 end = state.block_info[block_index].end; BlockInfo& new_block = CreateBlockInfo(state, address, end); BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; - new_block.branch = current_block.branch; + new_block.branch = std::move(current_block.branch); BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); const auto branch = std::get_if<SingleBranch>(forward_branch.get()); branch->address = address; branch->ignore = true; - current_block.branch = forward_branch; + current_block.branch = std::move(forward_branch); return true; } default: diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 478394682..4db329fa5 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { SetRegister(bb, instr.gpr0, value); break; } - case OpCode::Id::FCMP_R: { + case OpCode::Id::FCMP_RR: + case OpCode::Id::FCMP_RC: { UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); Node op_c = GetRegister(instr.gpr39); Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b8f63922f..8112ead3e 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -3,7 +3,9 @@ // Refer to the license.txt file included. #include <algorithm> +#include <utility> #include <vector> + #include <fmt/format.h> #include "common/alignment.h" @@ -16,6 +18,7 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; @@ -27,29 +30,26 @@ using Tegra::Shader::StoreType; namespace { -Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { - const OperationCode operation_code = [op] { - switch (op) { - case AtomicOp::Add: - return OperationCode::AtomicIAdd; - case AtomicOp::Min: - return OperationCode::AtomicIMin; - case AtomicOp::Max: - return OperationCode::AtomicIMax; - case AtomicOp::And: - return OperationCode::AtomicIAnd; - case AtomicOp::Or: - return OperationCode::AtomicIOr; - case AtomicOp::Xor: - return OperationCode::AtomicIXor; - case AtomicOp::Exch: - return OperationCode::AtomicIExchange; - default: - UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); - return OperationCode::AtomicIAdd; - } - }(); - return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); +OperationCode GetAtomOperation(AtomicOp op) { + switch (op) { + case AtomicOp::Add: + return OperationCode::AtomicIAdd; + case AtomicOp::Min: + return OperationCode::AtomicIMin; + case AtomicOp::Max: + return OperationCode::AtomicIMax; + case AtomicOp::And: + return OperationCode::AtomicIAnd; + case AtomicOp::Or: + return OperationCode::AtomicIOr; + case AtomicOp::Xor: + return OperationCode::AtomicIXor; + case AtomicOp::Exch: + return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; + } } bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { @@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), - Immediate(size)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); } Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { - Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); - offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); - return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), - std::move(offset), Immediate(size)); + Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); + offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); + return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), + Immediate(size)); } Node Sign16Extend(Node value) { Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); - Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); + Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); - return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); + return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); } } // Anonymous namespace @@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { if (IsUnaligned(type)) { const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); + value = InsertUnaligned(gmem, move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::RED: { + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); + Node value = GetRegister(instr.gpr0); + bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); + break; + } case OpCode::Id::ATOM: { UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || instr.atom.operation == AtomicOp::Dec || instr.atom.operation == AtomicOp::SafeAdd, "operation={}", static_cast<int>(instr.atom.operation.Value())); UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || - instr.atom.type == GlobalAtomicType::U64, + instr.atom.type == GlobalAtomicType::U64 || + instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || + instr.atom.type == GlobalAtomicType::F32_FTZ_RN, "type={}", static_cast<int>(instr.atom.type.Value())); const auto [real_address, base_address, descriptor] = @@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } const bool is_signed = - instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; + instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, - GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, + GetRegister(instr.gpr20))); break; } case OpCode::Id::ATOMS: { @@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); Node address = GetRegister(instr.gpr8); - address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); - Node value = - GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, - GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); - SetRegister(bb, instr.gpr0, std::move(value)); + address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); + SetRegister(bb, instr.gpr0, + SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, + GetSharedMemory(move(address)), GetRegister(instr.gpr20))); break; } case OpCode::Id::AL2P: { diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 3b391d3e6..d4ffa8014 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -23,7 +23,6 @@ Node IsFull(Node shift) { } Node Shift(OperationCode opcode, Node value, Node shift) { - Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); Node shifted = Operation(opcode, move(value), shift); return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5fcc9da60..3eee961f5 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -178,6 +178,20 @@ enum class OperationCode { AtomicIOr, /// (memory, int) -> int AtomicIXor, /// (memory, int) -> int + ReduceUAdd, /// (memory, uint) -> void + ReduceUMin, /// (memory, uint) -> void + ReduceUMax, /// (memory, uint) -> void + ReduceUAnd, /// (memory, uint) -> void + ReduceUOr, /// (memory, uint) -> void + ReduceUXor, /// (memory, uint) -> void + + ReduceIAdd, /// (memory, int) -> void + ReduceIMin, /// (memory, int) -> void + ReduceIMax, /// (memory, int) -> void + ReduceIAnd, /// (memory, int) -> void + ReduceIOr, /// (memory, int) -> void + ReduceIXor, /// (memory, int) -> void + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7af0e792c..715f39d0d 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -248,8 +248,14 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, // Use an extra temporal buffer auto& tmp_buffer = staging_cache.GetBuffer(1); + // Special case for 3D Texture Segments + const bool must_read_current_data = + params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; tmp_buffer.resize(guest_memory_size); host_ptr = tmp_buffer.data(); + if (must_read_current_data) { + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index a39a8661b..c5ab21f56 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -72,9 +72,9 @@ public: return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { + bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; - return (gpu_addr <= other_start && other_end <= gpu_addr_end); + return gpu_addr <= other_start && other_end <= gpu_addr_end; } // Use only when recycling a surface diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 57a1f5803..6b5f5984b 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -20,4 +20,8 @@ bool ViewParams::operator==(const ViewParams& rhs) const { std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); } +bool ViewParams::operator!=(const ViewParams& rhs) const { + return !operator==(rhs); +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index b17fd11a9..90a8bb0ae 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -21,6 +21,7 @@ struct ViewParams { std::size_t Hash() const; bool operator==(const ViewParams& rhs) const; + bool operator!=(const ViewParams& rhs) const; bool IsLayered() const { switch (target) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cfc7fe6e9..3e8663adf 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -509,7 +509,9 @@ private: } const auto& final_params = new_surface->GetSurfaceParams(); if (cr_params.type != final_params.type) { - BufferCopy(current_surface, new_surface); + if (Settings::values.use_accurate_gpu_emulation) { + BufferCopy(current_surface, new_surface); + } } else { std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); for (auto& brick : bricks) { @@ -612,10 +614,10 @@ private: * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of * the HLE methods. * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param cache_addr The starting address of the new surface on physical memory. + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param cpu_addr The starting address of the new surface on physical memory. */ std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, const SurfaceParams& params, |