diff options
Diffstat (limited to '')
28 files changed, 736 insertions, 477 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 5639021d3..1e8e1b215 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -125,6 +125,8 @@ add_library(common STATIC uint128.h vector_math.h web_result.h + zstd_compression.cpp + zstd_compression.h ) if(ARCHITECTURE_x86_64) @@ -138,4 +140,4 @@ endif() create_target_directory_groups(common) target_link_libraries(common PUBLIC Boost::boost fmt microprofile) -target_link_libraries(common PRIVATE lz4_static) +target_link_libraries(common PRIVATE lz4_static libzstd_static) diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp new file mode 100644 index 000000000..60a35c67c --- /dev/null +++ b/src/common/zstd_compression.cpp @@ -0,0 +1,53 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <zstd.h> + +#include "common/assert.h" +#include "common/zstd_compression.h" + +namespace Common::Compression { + +std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level) { + compression_level = std::clamp(compression_level, 1, ZSTD_maxCLevel()); + + const std::size_t max_compressed_size = ZSTD_compressBound(source_size); + std::vector<u8> compressed(max_compressed_size); + + const std::size_t compressed_size = + ZSTD_compress(compressed.data(), compressed.size(), source, source_size, compression_level); + + if (ZSTD_isError(compressed_size)) { + // Compression failed + return {}; + } + + compressed.resize(compressed_size); + + return compressed; +} + +std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size) { + return CompressDataZSTD(source, source_size, ZSTD_CLEVEL_DEFAULT); +} + +std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed) { + const std::size_t decompressed_size = + ZSTD_getDecompressedSize(compressed.data(), compressed.size()); + std::vector<u8> decompressed(decompressed_size); + + const std::size_t uncompressed_result_size = ZSTD_decompress( + decompressed.data(), decompressed.size(), compressed.data(), compressed.size()); + + if (decompressed_size != uncompressed_result_size || ZSTD_isError(uncompressed_result_size)) { + // Decompression failed + return {}; + } + return decompressed; +} + +} // namespace Common::Compression diff --git a/src/common/zstd_compression.h b/src/common/zstd_compression.h new file mode 100644 index 000000000..e0a64b035 --- /dev/null +++ b/src/common/zstd_compression.h @@ -0,0 +1,42 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <vector> + +#include "common/common_types.h" + +namespace Common::Compression { + +/** + * Compresses a source memory region with Zstandard and returns the compressed data in a vector. + * + * @param source the uncompressed source memory region. + * @param source_size the size in bytes of the uncompressed source memory region. + * @param compression_level the used compression level. Should be between 1 and 22. + * + * @return the compressed data. + */ +std::vector<u8> CompressDataZSTD(const u8* source, std::size_t source_size, s32 compression_level); + +/** + * Compresses a source memory region with Zstandard with the default compression level and returns + * the compressed data in a vector. + * + * @param source the uncompressed source memory region. + * @param source_size the size in bytes of the uncompressed source memory region. + * + * @return the compressed data. + */ +std::vector<u8> CompressDataZSTDDefault(const u8* source, std::size_t source_size); + +/** + * Decompresses a source memory region with Zstandard and returns the uncompressed data in a vector. + * + * @param compressed the compressed source memory region. + * + * @return the decompressed data. + */ +std::vector<u8> DecompressDataZSTD(const std::vector<u8>& compressed); + +} // namespace Common::Compression
\ No newline at end of file diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index f64e4c6a6..49145911b 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -163,7 +163,6 @@ MICROPROFILE_DEFINE(ARM_Jit_Dynarmic, "ARM JIT", "Dynarmic", MP_RGB(255, 64, 64) void ARM_Dynarmic::Run() { MICROPROFILE_SCOPE(ARM_Jit_Dynarmic); - ASSERT(Memory::GetCurrentPageTable() == current_page_table); jit->Run(); } @@ -278,7 +277,6 @@ void ARM_Dynarmic::ClearExclusiveState() { void ARM_Dynarmic::PageTableChanged() { jit = MakeJit(); - current_page_table = Memory::GetCurrentPageTable(); } DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index 81e0b4ac0..d867c2a50 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h @@ -12,10 +12,6 @@ #include "core/arm/exclusive_monitor.h" #include "core/arm/unicorn/arm_unicorn.h" -namespace Common { -struct PageTable; -} - namespace Core::Timing { class CoreTiming; } @@ -69,8 +65,6 @@ private: std::size_t core_index; Timing::CoreTiming& core_timing; DynarmicExclusiveMonitor& exclusive_monitor; - - Common::PageTable* current_page_table = nullptr; }; class DynarmicExclusiveMonitor final : public ExclusiveMonitor { diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index 68406eb63..ac0e1d796 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h @@ -139,10 +139,8 @@ public: context->AddDomainObject(std::move(iface)); } else { auto& kernel = Core::System::GetInstance().Kernel(); - auto sessions = + auto [server, client] = Kernel::ServerSession::CreateSessionPair(kernel, iface->GetServiceName()); - auto server = std::get<Kernel::SharedPtr<Kernel::ServerSession>>(sessions); - auto client = std::get<Kernel::SharedPtr<Kernel::ClientSession>>(sessions); iface->ClientConnected(server); context->AddMoveObject(std::move(client)); } diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp index aa432658e..744b1697d 100644 --- a/src/core/hle/kernel/client_port.cpp +++ b/src/core/hle/kernel/client_port.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <tuple> - #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/client_session.h" #include "core/hle/kernel/errors.h" @@ -31,18 +29,18 @@ ResultVal<SharedPtr<ClientSession>> ClientPort::Connect() { active_sessions++; // Create a new session pair, let the created sessions inherit the parent port's HLE handler. - auto sessions = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); + auto [server, client] = ServerSession::CreateSessionPair(kernel, server_port->GetName(), this); if (server_port->HasHLEHandler()) { - server_port->GetHLEHandler()->ClientConnected(std::get<SharedPtr<ServerSession>>(sessions)); + server_port->GetHLEHandler()->ClientConnected(server); } else { - server_port->AppendPendingSession(std::get<SharedPtr<ServerSession>>(sessions)); + server_port->AppendPendingSession(server); } // Wake the threads waiting on the ServerPort server_port->WakeupAllWaitingThreads(); - return MakeResult(std::get<SharedPtr<ClientSession>>(sessions)); + return MakeResult(client); } void ClientPort::ConnectionClosed() { diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 3f14bfa86..4d58e7c69 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -21,6 +21,7 @@ #include "core/hle/kernel/thread.h" #include "core/hle/lock.h" #include "core/hle/result.h" +#include "core/memory.h" namespace Kernel { @@ -181,6 +182,7 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) { void KernelCore::MakeCurrentProcess(Process* process) { impl->current_process = process; + Memory::SetCurrentPageTable(&process->VMManager().page_table); } Process* KernelCore::CurrentProcess() { diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 041267318..26c6b95ab 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -32,9 +32,6 @@ namespace { * @param priority The priority to give the main thread */ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { - // Setup page table so we can write to memory - Memory::SetCurrentPageTable(&owner_process.VMManager().page_table); - // Initialize new "main" thread const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index ac501bf7f..e8447b69a 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -101,7 +101,6 @@ void Scheduler::SwitchContext(Thread* new_thread) { auto* const thread_owner_process = current_thread->GetOwnerProcess(); if (previous_process != thread_owner_process) { system.Kernel().MakeCurrentProcess(thread_owner_process); - Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table); } cpu_core.LoadContext(new_thread->GetContext()); diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp index 708fdf9e1..02e7c60e6 100644 --- a/src/core/hle/kernel/server_port.cpp +++ b/src/core/hle/kernel/server_port.cpp @@ -39,9 +39,8 @@ void ServerPort::Acquire(Thread* thread) { ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); } -std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortPair( - KernelCore& kernel, u32 max_sessions, std::string name) { - +ServerPort::PortPair ServerPort::CreatePortPair(KernelCore& kernel, u32 max_sessions, + std::string name) { SharedPtr<ServerPort> server_port(new ServerPort(kernel)); SharedPtr<ClientPort> client_port(new ClientPort(kernel)); @@ -51,7 +50,7 @@ std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> ServerPort::CreatePortP client_port->max_sessions = max_sessions; client_port->active_sessions = 0; - return std::make_tuple(std::move(server_port), std::move(client_port)); + return std::make_pair(std::move(server_port), std::move(client_port)); } } // namespace Kernel diff --git a/src/core/hle/kernel/server_port.h b/src/core/hle/kernel/server_port.h index 76293cb8b..fef573b71 100644 --- a/src/core/hle/kernel/server_port.h +++ b/src/core/hle/kernel/server_port.h @@ -6,7 +6,7 @@ #include <memory> #include <string> -#include <tuple> +#include <utility> #include <vector> #include "common/common_types.h" #include "core/hle/kernel/object.h" @@ -23,6 +23,7 @@ class SessionRequestHandler; class ServerPort final : public WaitObject { public: using HLEHandler = std::shared_ptr<SessionRequestHandler>; + using PortPair = std::pair<SharedPtr<ServerPort>, SharedPtr<ClientPort>>; /** * Creates a pair of ServerPort and an associated ClientPort. @@ -32,8 +33,8 @@ public: * @param name Optional name of the ports * @return The created port tuple */ - static std::tuple<SharedPtr<ServerPort>, SharedPtr<ClientPort>> CreatePortPair( - KernelCore& kernel, u32 max_sessions, std::string name = "UnknownPort"); + static PortPair CreatePortPair(KernelCore& kernel, u32 max_sessions, + std::string name = "UnknownPort"); std::string GetTypeName() const override { return "ServerPort"; diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 40cec143e..a6b2cf06a 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -204,6 +204,6 @@ ServerSession::SessionPair ServerSession::CreateSessionPair(KernelCore& kernel, client_session->parent = parent; server_session->parent = parent; - return std::make_tuple(std::move(server_session), std::move(client_session)); + return std::make_pair(std::move(server_session), std::move(client_session)); } } // namespace Kernel diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index 3429a326f..09b835ff8 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -6,6 +6,7 @@ #include <memory> #include <string> +#include <utility> #include <vector> #include "core/hle/kernel/object.h" @@ -58,7 +59,7 @@ public: return parent.get(); } - using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; + using SessionPair = std::pair<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; /** * Creates a pair of ServerSession and an associated ClientSession. diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 332c1037c..4e0538bc2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -38,10 +38,6 @@ void SetCurrentPageTable(Common::PageTable* page_table) { } } -Common::PageTable* GetCurrentPageTable() { - return current_page_table; -} - static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, Common::PageType type) { LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, diff --git a/src/core/memory.h b/src/core/memory.h index 1d38cdca8..6845f5fe1 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -28,16 +28,6 @@ constexpr u64 PAGE_MASK = PAGE_SIZE - 1; /// Virtual user-space memory regions enum : VAddr { - /// Read-only page containing kernel and system configuration values. - CONFIG_MEMORY_VADDR = 0x1FF80000, - CONFIG_MEMORY_SIZE = 0x00001000, - CONFIG_MEMORY_VADDR_END = CONFIG_MEMORY_VADDR + CONFIG_MEMORY_SIZE, - - /// Usually read-only page containing mostly values read from hardware. - SHARED_PAGE_VADDR = 0x1FF81000, - SHARED_PAGE_SIZE = 0x00001000, - SHARED_PAGE_VADDR_END = SHARED_PAGE_VADDR + SHARED_PAGE_SIZE, - /// TLS (Thread-Local Storage) related. TLS_ENTRY_SIZE = 0x200, @@ -50,9 +40,8 @@ enum : VAddr { KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, }; -/// Currently active page table +/// Changes the currently active page table. void SetCurrentPageTable(Common::PageTable* page_table); -Common::PageTable* GetCurrentPageTable(); /// Determines if the given VAddr is valid for the specified process. bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 3e1a735c3..58af41f6e 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -17,7 +17,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} { auto process = Kernel::Process::Create(Core::System::GetInstance(), ""); - kernel.MakeCurrentProcess(process.get()); page_table = &process->VMManager().page_table; std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); @@ -28,7 +27,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_) Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); - Memory::SetCurrentPageTable(page_table); + kernel.MakeCurrentProcess(process.get()); } TestEnvironment::~TestEnvironment() { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 30b29e14d..4461083ff 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { auto& rasterizer{renderer.Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(); + memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 8417324ff..0f4e820aa 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -5,16 +5,13 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" -#include "core/core.h" #include "core/memory.h" -#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_base.h" namespace Tegra { -MemoryManager::MemoryManager() { +MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); std::fill(page_table.attributes.begin(), page_table.attributes.end(), Common::PageType::Unmapped); @@ -70,8 +67,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; - Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr, - aligned_size); + rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); return gpu_addr; @@ -204,14 +200,85 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { } void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { - std::memcpy(dest_buffer, GetPointer(src_addr), size); + std::size_t remaining_size{size}; + std::size_t page_index{src_addr >> page_bits}; + std::size_t page_offset{src_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; + + switch (page_table.attributes[page_index]) { + case Common::PageType::Memory: { + const u8* src_ptr{page_table.pointers[page_index] + page_offset}; + rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); + std::memcpy(dest_buffer, src_ptr, copy_amount); + break; + } + default: + UNREACHABLE(); + } + + page_index++; + page_offset = 0; + dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; + remaining_size -= copy_amount; + } } + void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { - std::memcpy(GetPointer(dest_addr), src_buffer, size); + std::size_t remaining_size{size}; + std::size_t page_index{dest_addr >> page_bits}; + std::size_t page_offset{dest_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; + + switch (page_table.attributes[page_index]) { + case Common::PageType::Memory: { + u8* dest_ptr{page_table.pointers[page_index] + page_offset}; + rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); + std::memcpy(dest_ptr, src_buffer, copy_amount); + break; + } + default: + UNREACHABLE(); + } + + page_index++; + page_offset = 0; + src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; + remaining_size -= copy_amount; + } } void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { - std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size); + std::size_t remaining_size{size}; + std::size_t page_index{src_addr >> page_bits}; + std::size_t page_offset{src_addr & page_mask}; + + while (remaining_size > 0) { + const std::size_t copy_amount{ + std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; + + switch (page_table.attributes[page_index]) { + case Common::PageType::Memory: { + const u8* src_ptr{page_table.pointers[page_index] + page_offset}; + rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); + WriteBlock(dest_addr, src_ptr, copy_amount); + break; + } + default: + UNREACHABLE(); + } + + page_index++; + page_offset = 0; + dest_addr += static_cast<VAddr>(copy_amount); + src_addr += static_cast<VAddr>(copy_amount); + remaining_size -= copy_amount; + } } void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, @@ -351,7 +418,7 @@ MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) { const VirtualMemoryArea& vma{vma_handle->second}; if (vma.type == VirtualMemoryArea::Type::Mapped) { // Region is already allocated - return {}; + return vma_handle; } const VAddr start_in_vma{base - vma.base}; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 178e2f655..647cbf93a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -10,6 +10,10 @@ #include "common/common_types.h" #include "common/page_table.h" +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra { /** @@ -43,7 +47,7 @@ struct VirtualMemoryArea { class MemoryManager final { public: - MemoryManager(); + MemoryManager(VideoCore::RasterizerInterface& rasterizer); GPUVAddr AllocateSpace(u64 size, u64 align); GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); @@ -144,6 +148,7 @@ private: Common::PageTable page_table{page_bits}; VMAMap vma_map; + VideoCore::RasterizerInterface& rasterizer; }; } // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a1a51f226..3ea08ef7b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -21,6 +21,8 @@ namespace OpenGL::GLShader { +namespace { + using Tegra::Shader::Attribute; using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; @@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; +enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; + +struct TextureAoffi {}; +using TextureArgument = std::pair<Type, Node>; +using TextureIR = std::variant<TextureAoffi, TextureArgument>; + enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); -enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; - class ShaderWriter { public: void AddExpression(std::string_view text) { @@ -91,7 +97,7 @@ private: }; /// Generates code to use for a swizzle operation. -static std::string GetSwizzle(u32 elem) { +std::string GetSwizzle(u32 elem) { ASSERT(elem <= 3); std::string swizzle = "."; swizzle += "xyzw"[elem]; @@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) { } /// Translate topology -static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { +std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { case Tegra::Shader::OutputTopology::PointList: return "points"; @@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { } /// Returns true if an object has to be treated as precise -static bool IsPrecise(Operation operand) { +bool IsPrecise(Operation operand) { const auto& meta = operand.GetMeta(); if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { @@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) { return false; } -static bool IsPrecise(Node node) { +bool IsPrecise(Node node) { if (const auto operation = std::get_if<OperationNode>(node)) { return IsPrecise(*operation); } @@ -723,8 +729,8 @@ private: result_type)); } - std::string GenerateTexture(Operation operation, const std::string& func, - const std::vector<std::pair<Type, Node>>& extras) { + std::string GenerateTexture(Operation operation, const std::string& function_suffix, + const std::vector<TextureIR>& extras) { constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); @@ -734,11 +740,11 @@ private: const bool has_array = meta->sampler.IsArray(); const bool has_shadow = meta->sampler.IsShadow(); - std::string expr = func; - expr += '('; - expr += GetSampler(meta->sampler); - expr += ", "; - + std::string expr = "texture" + function_suffix; + if (!meta->aoffi.empty()) { + expr += "Offset"; + } + expr += '(' + GetSampler(meta->sampler) + ", "; expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { @@ -756,36 +762,74 @@ private: } expr += ')'; - for (const auto& extra_pair : extras) { - const auto [type, operand] = extra_pair; - if (operand == nullptr) { - continue; + for (const auto& variant : extras) { + if (const auto argument = std::get_if<TextureArgument>(&variant)) { + expr += GenerateTextureArgument(*argument); + } else if (std::get_if<TextureAoffi>(&variant)) { + expr += GenerateTextureAoffi(meta->aoffi); + } else { + UNREACHABLE(); } - expr += ", "; + } - switch (type) { - case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { - // Inline the string as an immediate integer in GLSL (some extra arguments are - // required to be constant) - expr += std::to_string(static_cast<s32>(immediate->GetValue())); - } else { - expr += "ftoi(" + Visit(operand) + ')'; - } - break; - case Type::Float: - expr += Visit(operand); - break; - default: { - const auto type_int = static_cast<u32>(type); - UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); - expr += '0'; - break; + return expr + ')'; + } + + std::string GenerateTextureArgument(TextureArgument argument) { + const auto [type, operand] = argument; + if (operand == nullptr) { + return {}; + } + + std::string expr = ", "; + switch (type) { + case Type::Int: + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + // Inline the string as an immediate integer in GLSL (some extra arguments are + // required to be constant) + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(operand) + ')'; + } + break; + case Type::Float: + expr += Visit(operand); + break; + default: { + const auto type_int = static_cast<u32>(type); + UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); + expr += '0'; + break; + } + } + return expr; + } + + std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { + if (aoffi.empty()) { + return {}; + } + constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; + std::string expr = ", "; + expr += coord_constructors.at(aoffi.size() - 1); + expr += '('; + + for (std::size_t index = 0; index < aoffi.size(); ++index) { + const auto operand{aoffi.at(index)}; + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + // Inline the string as an immediate integer in GLSL (AOFFI arguments are required + // to be constant by the standard). + expr += std::to_string(static_cast<s32>(immediate->GetValue())); + } else { + expr += "ftoi(" + Visit(operand) + ')'; } + if (index + 1 < aoffi.size()) { + expr += ", "; } } + expr += ')'; - return expr + ')'; + return expr; } std::string Assign(Operation operation) { @@ -1164,7 +1208,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); + std::string expr = GenerateTexture( + operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1175,7 +1220,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); + std::string expr = GenerateTexture( + operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1187,7 +1233,8 @@ private: ASSERT(meta); const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; - return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + + return GenerateTexture(operation, "Gather", + {TextureArgument{type, meta->component}, TextureAoffi{}}) + GetSwizzle(meta->element); } @@ -1217,8 +1264,8 @@ private: ASSERT(meta); if (meta->element < 2) { - return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + - " * vec2(256))" + GetSwizzle(meta->element) + "))"; + return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + + GetSwizzle(meta->element) + "))"; } return "0"; } @@ -1571,6 +1618,8 @@ private: ShaderWriter code; }; +} // Anonymous namespace + std::string GetCommonDeclarations() { const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index d2d979997..8a43eb157 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -10,8 +10,8 @@ #include "common/common_types.h" #include "common/file_util.h" #include "common/logging/log.h" -#include "common/lz4_compression.h" #include "common/scm_rev.h" +#include "common/zstd_compression.h" #include "core/core.h" #include "core/hle/kernel/process.h" @@ -259,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length); + dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary); if (dump.binary.empty()) { return {}; } @@ -288,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn return {}; } - const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size); + const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code); if (code.empty()) { return {}; } @@ -474,8 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str if (!IsUsable()) return; - const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC( - reinterpret_cast<const u8*>(code.data()), code.size(), 9)}; + const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault( + reinterpret_cast<const u8*>(code.data()), code.size())}; if (compressed_code.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", unique_identifier); @@ -506,7 +506,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); const std::vector<u8> compressed_binary = - Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9); + Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size()); if (compressed_binary.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 8eef2a920..37dcfefdb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -62,7 +62,6 @@ public: UpdatePipeline(); state.draw.shader_program = 0; state.draw.program_pipeline = pipeline.handle; - state.geometry_shaders.enabled = (gs != 0); } private: diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 9419326a3..52d569a1b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -10,16 +10,62 @@ namespace OpenGL { -OpenGLState OpenGLState::cur_state; +using Maxwell = Tegra::Engines::Maxwell3D::Regs; +OpenGLState OpenGLState::cur_state; bool OpenGLState::s_rgb_used; +namespace { + +template <typename T> +bool UpdateValue(T& current_value, const T new_value) { + const bool changed = current_value != new_value; + current_value = new_value; + return changed; +} + +template <typename T1, typename T2> +bool UpdateTie(T1 current_value, const T2 new_value) { + const bool changed = current_value != new_value; + current_value = new_value; + return changed; +} + +void Enable(GLenum cap, bool enable) { + if (enable) { + glEnable(cap); + } else { + glDisable(cap); + } +} + +void Enable(GLenum cap, GLuint index, bool enable) { + if (enable) { + glEnablei(cap, index); + } else { + glDisablei(cap, index); + } +} + +void Enable(GLenum cap, bool& current_value, bool new_value) { + if (UpdateValue(current_value, new_value)) + Enable(cap, new_value); +} + +void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { + if (UpdateValue(current_value, new_value)) + Enable(cap, index, new_value); +} + +} // namespace + OpenGLState::OpenGLState() { // These all match default OpenGL values - geometry_shaders.enabled = false; framebuffer_srgb.enabled = false; + multisample_control.alpha_to_coverage = false; multisample_control.alpha_to_one = false; + cull.enabled = false; cull.mode = GL_BACK; cull.front_face = GL_CCW; @@ -30,14 +76,15 @@ OpenGLState::OpenGLState() { primitive_restart.enabled = false; primitive_restart.index = 0; + for (auto& item : color_mask) { item.red_enabled = GL_TRUE; item.green_enabled = GL_TRUE; item.blue_enabled = GL_TRUE; item.alpha_enabled = GL_TRUE; } - stencil.test_enabled = false; - auto reset_stencil = [](auto& config) { + + const auto ResetStencil = [](auto& config) { config.test_func = GL_ALWAYS; config.test_ref = 0; config.test_mask = 0xFFFFFFFF; @@ -46,8 +93,10 @@ OpenGLState::OpenGLState() { config.action_depth_pass = GL_KEEP; config.action_stencil_fail = GL_KEEP; }; - reset_stencil(stencil.front); - reset_stencil(stencil.back); + stencil.test_enabled = false; + ResetStencil(stencil.front); + ResetStencil(stencil.back); + for (auto& item : viewports) { item.x = 0; item.y = 0; @@ -61,6 +110,7 @@ OpenGLState::OpenGLState() { item.scissor.width = 0; item.scissor.height = 0; } + for (auto& item : blend) { item.enabled = true; item.rgb_equation = GL_FUNC_ADD; @@ -70,11 +120,14 @@ OpenGLState::OpenGLState() { item.src_a_func = GL_ONE; item.dst_a_func = GL_ZERO; } + independant_blend.enabled = false; + blend_color.red = 0.0f; blend_color.green = 0.0f; blend_color.blue = 0.0f; blend_color.alpha = 0.0f; + logic_op.enabled = false; logic_op.operation = GL_COPY; @@ -91,9 +144,12 @@ OpenGLState::OpenGLState() { clip_distance = {}; point.size = 1; + fragment_color_clamp.enabled = false; + depth_clamp.far_plane = false; depth_clamp.near_plane = false; + polygon_offset.fill_enable = false; polygon_offset.line_enable = false; polygon_offset.point_enable = false; @@ -103,260 +159,255 @@ OpenGLState::OpenGLState() { } void OpenGLState::ApplyDefaultState() { + glEnable(GL_BLEND); glDisable(GL_FRAMEBUFFER_SRGB); glDisable(GL_CULL_FACE); glDisable(GL_DEPTH_TEST); glDisable(GL_PRIMITIVE_RESTART); glDisable(GL_STENCIL_TEST); - glEnable(GL_BLEND); glDisable(GL_COLOR_LOGIC_OP); glDisable(GL_SCISSOR_TEST); } +void OpenGLState::ApplyFramebufferState() const { + if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { + glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); + } + if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); + } +} + +void OpenGLState::ApplyVertexArrayState() const { + if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { + glBindVertexArray(draw.vertex_array); + } +} + +void OpenGLState::ApplyShaderProgram() const { + if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { + glUseProgram(draw.shader_program); + } +} + +void OpenGLState::ApplyProgramPipeline() const { + if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { + glBindProgramPipeline(draw.program_pipeline); + } +} + +void OpenGLState::ApplyClipDistances() const { + for (std::size_t i = 0; i < clip_distance.size(); ++i) { + Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], + clip_distance[i]); + } +} + +void OpenGLState::ApplyPointSize() const { + if (UpdateValue(cur_state.point.size, point.size)) { + glPointSize(point.size); + } +} + +void OpenGLState::ApplyFragmentColorClamp() const { + if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { + glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, + fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); + } +} + +void OpenGLState::ApplyMultisample() const { + Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, + multisample_control.alpha_to_coverage); + Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, + multisample_control.alpha_to_one); +} + +void OpenGLState::ApplyDepthClamp() const { + if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && + depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { + return; + } + cur_state.depth_clamp = depth_clamp; + + UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, + "Unimplemented Depth Clamp Separation!"); + + Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); +} + void OpenGLState::ApplySRgb() const { - if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) { - if (framebuffer_srgb.enabled) { - // Track if sRGB is used - s_rgb_used = true; - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); - } + if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) + return; + cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; + if (framebuffer_srgb.enabled) { + // Track if sRGB is used + s_rgb_used = true; + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); } } void OpenGLState::ApplyCulling() const { - if (cull.enabled != cur_state.cull.enabled) { - if (cull.enabled) { - glEnable(GL_CULL_FACE); - } else { - glDisable(GL_CULL_FACE); - } - } + Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); - if (cull.mode != cur_state.cull.mode) { + if (UpdateValue(cur_state.cull.mode, cull.mode)) { glCullFace(cull.mode); } - if (cull.front_face != cur_state.cull.front_face) { + if (UpdateValue(cur_state.cull.front_face, cull.front_face)) { glFrontFace(cull.front_face); } } void OpenGLState::ApplyColorMask() const { - if (independant_blend.enabled) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - const auto& updated = color_mask[i]; - const auto& current = cur_state.color_mask[i]; - if (updated.red_enabled != current.red_enabled || - updated.green_enabled != current.green_enabled || - updated.blue_enabled != current.blue_enabled || - updated.alpha_enabled != current.alpha_enabled) { - glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, - updated.blue_enabled, updated.alpha_enabled); - } - } - } else { - const auto& updated = color_mask[0]; - const auto& current = cur_state.color_mask[0]; + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { + const auto& updated = color_mask[i]; + auto& current = cur_state.color_mask[i]; if (updated.red_enabled != current.red_enabled || updated.green_enabled != current.green_enabled || updated.blue_enabled != current.blue_enabled || updated.alpha_enabled != current.alpha_enabled) { - glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled, - updated.alpha_enabled); + current = updated; + glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, + updated.blue_enabled, updated.alpha_enabled); } } } void OpenGLState::ApplyDepth() const { - if (depth.test_enabled != cur_state.depth.test_enabled) { - if (depth.test_enabled) { - glEnable(GL_DEPTH_TEST); - } else { - glDisable(GL_DEPTH_TEST); - } - } + Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); - if (depth.test_func != cur_state.depth.test_func) { + if (cur_state.depth.test_func != depth.test_func) { + cur_state.depth.test_func = depth.test_func; glDepthFunc(depth.test_func); } - if (depth.write_mask != cur_state.depth.write_mask) { + if (cur_state.depth.write_mask != depth.write_mask) { + cur_state.depth.write_mask = depth.write_mask; glDepthMask(depth.write_mask); } } void OpenGLState::ApplyPrimitiveRestart() const { - if (primitive_restart.enabled != cur_state.primitive_restart.enabled) { - if (primitive_restart.enabled) { - glEnable(GL_PRIMITIVE_RESTART); - } else { - glDisable(GL_PRIMITIVE_RESTART); - } - } + Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); - if (primitive_restart.index != cur_state.primitive_restart.index) { + if (cur_state.primitive_restart.index != primitive_restart.index) { + cur_state.primitive_restart.index = primitive_restart.index; glPrimitiveRestartIndex(primitive_restart.index); } } void OpenGLState::ApplyStencilTest() const { - if (stencil.test_enabled != cur_state.stencil.test_enabled) { - if (stencil.test_enabled) { - glEnable(GL_STENCIL_TEST); - } else { - glDisable(GL_STENCIL_TEST); - } - } - - const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) { - if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref || - config.test_mask != prev_config.test_mask) { + Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); + + const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { + if (current.test_func != config.test_func || current.test_ref != config.test_ref || + current.test_mask != config.test_mask) { + current.test_func = config.test_func; + current.test_ref = config.test_ref; + current.test_mask = config.test_mask; glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); } - if (config.action_depth_fail != prev_config.action_depth_fail || - config.action_depth_pass != prev_config.action_depth_pass || - config.action_stencil_fail != prev_config.action_stencil_fail) { + if (current.action_depth_fail != config.action_depth_fail || + current.action_depth_pass != config.action_depth_pass || + current.action_stencil_fail != config.action_stencil_fail) { + current.action_depth_fail = config.action_depth_fail; + current.action_depth_pass = config.action_depth_pass; + current.action_stencil_fail = config.action_stencil_fail; glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, config.action_depth_pass); } - if (config.write_mask != prev_config.write_mask) { + if (current.write_mask != config.write_mask) { + current.write_mask = config.write_mask; glStencilMaskSeparate(face, config.write_mask); } }; ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); } -// Viewport does not affects glClearBuffer so emulate viewport using scissor test -void OpenGLState::EmulateViewportWithScissor() { - auto& current = viewports[0]; - if (current.scissor.enabled) { - const GLint left = std::max(current.x, current.scissor.x); - const GLint right = - std::max(current.x + current.width, current.scissor.x + current.scissor.width); - const GLint bottom = std::max(current.y, current.scissor.y); - const GLint top = - std::max(current.y + current.height, current.scissor.y + current.scissor.height); - current.scissor.x = std::max(left, 0); - current.scissor.y = std::max(bottom, 0); - current.scissor.width = std::max(right - left, 0); - current.scissor.height = std::max(top - bottom, 0); - } else { - current.scissor.enabled = true; - current.scissor.x = current.x; - current.scissor.y = current.y; - current.scissor.width = current.width; - current.scissor.height = current.height; - } -} void OpenGLState::ApplyViewport() const { - if (geometry_shaders.enabled) { - for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports); - i++) { - const auto& current = cur_state.viewports[i]; - const auto& updated = viewports[i]; - if (updated.x != current.x || updated.y != current.y || - updated.width != current.width || updated.height != current.height) { - glViewportIndexedf( - i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), - static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height)); - } - if (updated.depth_range_near != current.depth_range_near || - updated.depth_range_far != current.depth_range_far) { - glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); - } - - if (updated.scissor.enabled != current.scissor.enabled) { - if (updated.scissor.enabled) { - glEnablei(GL_SCISSOR_TEST, i); - } else { - glDisablei(GL_SCISSOR_TEST, i); - } - } - - if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height) { - glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); - } - } - } else { - const auto& current = cur_state.viewports[0]; - const auto& updated = viewports[0]; - if (updated.x != current.x || updated.y != current.y || updated.width != current.width || - updated.height != current.height) { - glViewport(updated.x, updated.y, updated.width, updated.height); - } - - if (updated.depth_range_near != current.depth_range_near || - updated.depth_range_far != current.depth_range_far) { - glDepthRange(updated.depth_range_near, updated.depth_range_far); + for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { + const auto& updated = viewports[i]; + auto& current = cur_state.viewports[i]; + + if (current.x != updated.x || current.y != updated.y || current.width != updated.width || + current.height != updated.height) { + current.x = updated.x; + current.y = updated.y; + current.width = updated.width; + current.height = updated.height; + glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), + static_cast<GLfloat>(updated.width), + static_cast<GLfloat>(updated.height)); } - - if (updated.scissor.enabled != current.scissor.enabled) { - if (updated.scissor.enabled) { - glEnable(GL_SCISSOR_TEST); - } else { - glDisable(GL_SCISSOR_TEST); - } + if (current.depth_range_near != updated.depth_range_near || + current.depth_range_far != updated.depth_range_far) { + current.depth_range_near = updated.depth_range_near; + current.depth_range_far = updated.depth_range_far; + glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); } - if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y || - updated.scissor.width != current.scissor.width || - updated.scissor.height != current.scissor.height) { - glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); + Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled); + + if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y || + current.scissor.width != updated.scissor.width || + current.scissor.height != updated.scissor.height) { + current.scissor.x = updated.scissor.x; + current.scissor.y = updated.scissor.y; + current.scissor.width = updated.scissor.width; + current.scissor.height = updated.scissor.height; + glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, + updated.scissor.height); } } } void OpenGLState::ApplyGlobalBlending() const { - const Blend& current = cur_state.blend[0]; const Blend& updated = blend[0]; - if (updated.enabled != current.enabled) { - if (updated.enabled) { - glEnable(GL_BLEND); - } else { - glDisable(GL_BLEND); - } - } - if (!updated.enabled) { - return; - } - if (updated.src_rgb_func != current.src_rgb_func || - updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || - updated.dst_a_func != current.dst_a_func) { + Blend& current = cur_state.blend[0]; + + Enable(GL_BLEND, current.enabled, updated.enabled); + + if (current.src_rgb_func != updated.src_rgb_func || + current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func || + current.dst_a_func != updated.dst_a_func) { + current.src_rgb_func = updated.src_rgb_func; + current.dst_rgb_func = updated.dst_rgb_func; + current.src_a_func = updated.src_a_func; + current.dst_a_func = updated.dst_a_func; glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { + if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) { + current.rgb_equation = updated.rgb_equation; + current.a_equation = updated.a_equation; glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); } } void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { const Blend& updated = blend[target]; - const Blend& current = cur_state.blend[target]; - if (updated.enabled != current.enabled || force) { - if (updated.enabled) { - glEnablei(GL_BLEND, static_cast<GLuint>(target)); - } else { - glDisablei(GL_BLEND, static_cast<GLuint>(target)); - } + Blend& current = cur_state.blend[target]; + + if (current.enabled != updated.enabled || force) { + current.enabled = updated.enabled; + Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled); } - if (updated.src_rgb_func != current.src_rgb_func || - updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func || - updated.dst_a_func != current.dst_a_func) { + if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func, + current.dst_a_func), + std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, + updated.dst_a_func))) { glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); } - if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) { + if (UpdateTie(std::tie(current.rgb_equation, current.a_equation), + std::tie(updated.rgb_equation, updated.a_equation))) { glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, updated.a_equation); } @@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const { void OpenGLState::ApplyBlending() const { if (independant_blend.enabled) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - ApplyTargetBlending(i, - independant_blend.enabled != cur_state.independant_blend.enabled); + const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; + for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { + ApplyTargetBlending(target, force); } } else { ApplyGlobalBlending(); } - if (blend_color.red != cur_state.blend_color.red || - blend_color.green != cur_state.blend_color.green || - blend_color.blue != cur_state.blend_color.blue || - blend_color.alpha != cur_state.blend_color.alpha) { + cur_state.independant_blend.enabled = independant_blend.enabled; + + if (UpdateTie( + std::tie(cur_state.blend_color.red, cur_state.blend_color.green, + cur_state.blend_color.blue, cur_state.blend_color.alpha), + std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) { glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); } } void OpenGLState::ApplyLogicOp() const { - if (logic_op.enabled != cur_state.logic_op.enabled) { - if (logic_op.enabled) { - glEnable(GL_COLOR_LOGIC_OP); - } else { - glDisable(GL_COLOR_LOGIC_OP); - } - } + Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); - if (logic_op.operation != cur_state.logic_op.operation) { + if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { glLogicOp(logic_op.operation); } } void OpenGLState::ApplyPolygonOffset() const { - const bool fill_enable_changed = - polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable; - const bool line_enable_changed = - polygon_offset.line_enable != cur_state.polygon_offset.line_enable; - const bool point_enable_changed = - polygon_offset.point_enable != cur_state.polygon_offset.point_enable; - const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor; - const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units; - const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp; - - if (fill_enable_changed) { - if (polygon_offset.fill_enable) { - glEnable(GL_POLYGON_OFFSET_FILL); - } else { - glDisable(GL_POLYGON_OFFSET_FILL); - } - } - - if (line_enable_changed) { - if (polygon_offset.line_enable) { - glEnable(GL_POLYGON_OFFSET_LINE); - } else { - glDisable(GL_POLYGON_OFFSET_LINE); - } - } - - if (point_enable_changed) { - if (polygon_offset.point_enable) { - glEnable(GL_POLYGON_OFFSET_POINT); - } else { - glDisable(GL_POLYGON_OFFSET_POINT); - } - } - - if (factor_changed || units_changed || clamp_changed) { + Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, + polygon_offset.fill_enable); + Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, + polygon_offset.line_enable); + Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable, + polygon_offset.point_enable); + + if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units, + cur_state.polygon_offset.clamp), + std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) { if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); } else { - glPolygonOffset(polygon_offset.factor, polygon_offset.units); UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, "Unimplemented Depth polygon offset clamp."); + glPolygonOffset(polygon_offset.factor, polygon_offset.units); } } } @@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const { bool has_delta{}; std::size_t first{}; std::size_t last{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures; + std::array<GLuint, Maxwell::NumTextureSamplers> textures; for (std::size_t i = 0; i < std::size(texture_units); ++i) { const auto& texture_unit = texture_units[i]; - const auto& cur_state_texture_unit = cur_state.texture_units[i]; + auto& cur_state_texture_unit = cur_state.texture_units[i]; textures[i] = texture_unit.texture; - - if (textures[i] != cur_state_texture_unit.texture) { - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (cur_state_texture_unit.texture == textures[i]) + continue; + cur_state_texture_unit.texture = textures[i]; + if (!has_delta) { + first = i; + has_delta = true; } + last = i; } - if (has_delta) { glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), textures.data() + first); @@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const { bool has_delta{}; std::size_t first{}; std::size_t last{}; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; + std::array<GLuint, Maxwell::NumTextureSamplers> samplers; + for (std::size_t i = 0; i < std::size(samplers); ++i) { + if (cur_state.texture_units[i].sampler == texture_units[i].sampler) + continue; + cur_state.texture_units[i].sampler = texture_units[i].sampler; samplers[i] = texture_units[i].sampler; - if (samplers[i] != cur_state.texture_units[i].sampler) { - if (!has_delta) { - first = i; - has_delta = true; - } - last = i; + if (!has_delta) { + first = i; + has_delta = true; } + last = i; } if (has_delta) { glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), @@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const { } } -void OpenGLState::ApplyFramebufferState() const { - if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); - } - if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); - } -} - -void OpenGLState::ApplyVertexArrayState() const { - if (draw.vertex_array != cur_state.draw.vertex_array) { - glBindVertexArray(draw.vertex_array); - } -} - -void OpenGLState::ApplyDepthClamp() const { - if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && - depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { - return; - } - UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, - "Unimplemented Depth Clamp Separation!"); - - if (depth_clamp.far_plane || depth_clamp.near_plane) { - glEnable(GL_DEPTH_CLAMP); - } else { - glDisable(GL_DEPTH_CLAMP); - } -} - void OpenGLState::Apply() const { ApplyFramebufferState(); ApplyVertexArrayState(); - - // Shader program - if (draw.shader_program != cur_state.draw.shader_program) { - glUseProgram(draw.shader_program); - } - - // Program pipeline - if (draw.program_pipeline != cur_state.draw.program_pipeline) { - glBindProgramPipeline(draw.program_pipeline); - } - // Clip distance - for (std::size_t i = 0; i < clip_distance.size(); ++i) { - if (clip_distance[i] != cur_state.clip_distance[i]) { - if (clip_distance[i]) { - glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); - } else { - glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); - } - } - } - // Point - if (point.size != cur_state.point.size) { - glPointSize(point.size); - } - if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) { - glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, - fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); - } - if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) { - if (multisample_control.alpha_to_coverage) { - glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE); - } else { - glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE); - } - } - if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) { - if (multisample_control.alpha_to_one) { - glEnable(GL_SAMPLE_ALPHA_TO_ONE); - } else { - glDisable(GL_SAMPLE_ALPHA_TO_ONE); - } - } + ApplyShaderProgram(); + ApplyProgramPipeline(); + ApplyClipDistances(); + ApplyPointSize(); + ApplyFragmentColorClamp(); + ApplyMultisample(); ApplyDepthClamp(); ApplyColorMask(); ApplyViewport(); @@ -574,7 +531,28 @@ void OpenGLState::Apply() const { ApplyTextures(); ApplySamplers(); ApplyPolygonOffset(); - cur_state = *this; +} + +void OpenGLState::EmulateViewportWithScissor() { + auto& current = viewports[0]; + if (current.scissor.enabled) { + const GLint left = std::max(current.x, current.scissor.x); + const GLint right = + std::max(current.x + current.width, current.scissor.x + current.scissor.width); + const GLint bottom = std::max(current.y, current.scissor.y); + const GLint top = + std::max(current.y + current.height, current.scissor.y + current.scissor.height); + current.scissor.x = std::max(left, 0); + current.scissor.y = std::max(bottom, 0); + current.scissor.width = std::max(right - left, 0); + current.scissor.height = std::max(top - bottom, 0); + } else { + current.scissor.enabled = true; + current.scissor.x = current.x; + current.scissor.y = current.y; + current.scissor.width = current.width; + current.scissor.height = current.height; + } } OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 9e1eda5b1..41418a7b8 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -54,10 +54,6 @@ public: } depth_clamp; // GL_DEPTH_CLAMP struct { - bool enabled; // viewports arrays are only supported when geometry shaders are enabled. - } geometry_shaders; - - struct { bool enabled; // GL_CULL_FACE GLenum mode; // GL_CULL_FACE_MODE GLenum front_face; // GL_FRONT_FACE @@ -184,34 +180,26 @@ public: static OpenGLState GetCurState() { return cur_state; } + static bool GetsRGBUsed() { return s_rgb_used; } + static void ClearsRGBUsed() { s_rgb_used = false; } + /// Apply this state as the current OpenGL state void Apply() const; - /// Apply only the state affecting the framebuffer + void ApplyFramebufferState() const; - /// Apply only the state affecting the vertex array void ApplyVertexArrayState() const; - /// Set the initial OpenGL state - static void ApplyDefaultState(); - /// Resets any references to the given resource - OpenGLState& UnbindTexture(GLuint handle); - OpenGLState& ResetSampler(GLuint handle); - OpenGLState& ResetProgram(GLuint handle); - OpenGLState& ResetPipeline(GLuint handle); - OpenGLState& ResetVertexArray(GLuint handle); - OpenGLState& ResetFramebuffer(GLuint handle); - void EmulateViewportWithScissor(); - -private: - static OpenGLState cur_state; - // Workaround for sRGB problems caused by - // QT not supporting srgb output - static bool s_rgb_used; + void ApplyShaderProgram() const; + void ApplyProgramPipeline() const; + void ApplyClipDistances() const; + void ApplyPointSize() const; + void ApplyFragmentColorClamp() const; + void ApplyMultisample() const; void ApplySRgb() const; void ApplyCulling() const; void ApplyColorMask() const; @@ -227,6 +215,26 @@ private: void ApplySamplers() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; + + /// Set the initial OpenGL state + static void ApplyDefaultState(); + + /// Resets any references to the given resource + OpenGLState& UnbindTexture(GLuint handle); + OpenGLState& ResetSampler(GLuint handle); + OpenGLState& ResetProgram(GLuint handle); + OpenGLState& ResetPipeline(GLuint handle); + OpenGLState& ResetVertexArray(GLuint handle); + OpenGLState& ResetFramebuffer(GLuint handle); + + /// Viewport does not affects glClearBuffer so emulate viewport using scissor test + void EmulateViewportWithScissor(); + +private: + static OpenGLState cur_state; + + // Workaround for sRGB problems caused by QT not supporting srgb output + static bool s_rgb_used; }; } // namespace OpenGL diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a99ae19bf..a775b402b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -7,7 +7,9 @@ #include <fmt/format.h> #include "common/assert.h" +#include "common/bit_field.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" @@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::TEX: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); } const TextureType texture_type{instr.tex.texture_type}; const bool is_array = instr.tex.array != 0; + const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); const auto process_mode = instr.tex.GetTextureProcessMode(); WriteTexInstructionFloat( - bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); + bb, instr, + GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); break; } case OpCode::Id::TEXS: { @@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } case OpCode::Id::TLD4: { ASSERT(instr.tld4.array == 0); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), "NDV is not implemented"); UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), @@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array)); + const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); + WriteTexInstructionFloat( + bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); break; } case OpCode::Id::TLD4S: { @@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); SetTemporal(bb, indexer++, value); @@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { for (u32 element = 0; element < 2; ++element) { auto params = coords; - MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporal(bb, element, value); } @@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset) { + Node array, Node depth_compare, u32 bias_offset, + std::vector<Node> aoffi) { const bool is_array = array; const bool is_shadow = depth_compare; @@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; + MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); + TextureProcessMode process_mode, bool depth_compare, bool is_array, + bool is_aoffi) { + const bool lod_bias_enabled{ + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; + + u64 parameter_register = instr.gpr20.Value(); + if (lod_bias_enabled) { + ++parameter_register; + } const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); @@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, const Node array = is_array ? GetRegister(array_register) : nullptr; + std::vector<Node> aoffi; + if (is_aoffi) { + aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); + } + Node dc{}; if (depth_compare) { // Depth is always stored in the register signaled by gpr20 or in the next register if lod // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); + dc = GetRegister(parameter_register++); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); } Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, @@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, dc = GetRegister(depth_register); } - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array) { + bool is_array, bool is_aoffi) { const std::size_t coord_count = GetCoordCount(texture_type); const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); @@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de const u64 coord_register = array_register + (is_array ? 1 : 0); std::vector<Node> coords; - for (size_t i = 0; i < coord_count; ++i) + for (std::size_t i = 0; i < coord_count; ++i) { coords.push_back(GetRegister(coord_register + i)); + } + + u64 parameter_register = instr.gpr20.Value(); + std::vector<Node> aoffi; + if (is_aoffi) { + aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); + } + + Node dc{}; + if (depth_compare) { + dc = GetRegister(parameter_register++); + } const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; + MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; @@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( return {coord_count, total_coord_count}; } -} // namespace VideoCommon::Shader
\ No newline at end of file +std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, + bool is_tld4) { + const auto [coord_offsets, size, wrap_value, + diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { + if (is_tld4) { + return {{0, 8, 16}, 6, 32, 64}; + } else { + return {{0, 4, 8}, 4, 8, 16}; + } + }(); + const u32 mask = (1U << size) - 1; + + std::vector<Node> aoffi; + aoffi.reserve(coord_count); + + const auto aoffi_immediate{ + TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; + if (!aoffi_immediate) { + // Variable access, not supported on AMD. + LOG_WARNING(HW_GPU, + "AOFFI constant folding failed, some hardware might have graphical issues"); + for (std::size_t coord = 0; coord < coord_count; ++coord) { + const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); + const Node condition = + Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); + const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); + aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); + } + return aoffi; + } + + for (std::size_t coord = 0; coord < coord_count; ++coord) { + s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; + if (value >= wrap_value) { + value -= diff_value; + } + aoffi.push_back(Immediate(value)); + } + return aoffi; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 5bc3a3900..4888998d3 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -7,6 +7,7 @@ #include <array> #include <cstring> #include <map> +#include <optional> #include <set> #include <string> #include <tuple> @@ -290,6 +291,7 @@ struct MetaTexture { const Sampler& sampler; Node array{}; Node depth_compare{}; + std::vector<Node> aoffi; Node bias{}; Node lod{}; Node component{}; @@ -741,14 +743,14 @@ private: Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, - bool is_array); + bool is_array, bool is_aoffi); Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array); + bool depth_compare, bool is_array, bool is_aoffi); Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, bool is_array); @@ -757,9 +759,11 @@ private: Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); + std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); + Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset); + Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, u64 byte_height); @@ -773,6 +777,8 @@ private: Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); + std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); template <typename... T> diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 33b071747..4505667ff 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -6,6 +6,7 @@ #include <utility> #include <variant> +#include "common/common_types.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -14,7 +15,7 @@ namespace { std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, OperationCode operation_code) { for (; cursor >= 0; --cursor) { - const Node node = code[cursor]; + const Node node = code.at(cursor); if (const auto operation = std::get_if<OperationNode>(node)) { if (operation->GetCode() == operation_code) return {node, cursor}; @@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } +std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register + // that it uses as operand + const auto [found, found_cursor] = + TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + if (!found) { + return {}; + } + if (const auto immediate = std::get_if<ImmediateNode>(found)) { + return immediate->GetValue(); + } + return {}; +} + std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) { for (; cursor >= 0; --cursor) { |