diff options
Diffstat (limited to '')
-rw-r--r-- | src/core/file_sys/system_archive/ng_word.cpp | 41 | ||||
-rw-r--r-- | src/core/file_sys/system_archive/ng_word.h | 1 | ||||
-rw-r--r-- | src/core/file_sys/system_archive/system_archive.cpp | 7 | ||||
-rw-r--r-- | src/core/hle/kernel/object.cpp | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/process.cpp | 42 | ||||
-rw-r--r-- | src/core/hle/kernel/process.h | 30 | ||||
-rw-r--r-- | src/core/hle/kernel/readable_event.cpp | 12 | ||||
-rw-r--r-- | src/core/hle/kernel/readable_event.h | 11 | ||||
-rw-r--r-- | src/core/hle/kernel/svc.cpp | 15 | ||||
-rw-r--r-- | src/core/hle/service/ldr/ldr.cpp | 10 | ||||
-rw-r--r-- | src/core/memory.cpp | 8 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 116 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 70 |
15 files changed, 292 insertions, 82 deletions
diff --git a/src/core/file_sys/system_archive/ng_word.cpp b/src/core/file_sys/system_archive/ng_word.cpp index d0acdbd49..f4443784d 100644 --- a/src/core/file_sys/system_archive/ng_word.cpp +++ b/src/core/file_sys/system_archive/ng_word.cpp @@ -26,7 +26,7 @@ constexpr std::array<u8, 30> WORD_TXT{ VirtualDir NgWord1() { std::vector<VirtualFile> files(NgWord1Data::NUMBER_WORD_TXT_FILES); - for (std::size_t i = 0; i < NgWord1Data::NUMBER_WORD_TXT_FILES; ++i) { + for (std::size_t i = 0; i < files.size(); ++i) { files[i] = std::make_shared<ArrayVfsFile<NgWord1Data::WORD_TXT.size()>>( NgWord1Data::WORD_TXT, fmt::format("{}.txt", i)); } @@ -39,4 +39,43 @@ VirtualDir NgWord1() { return std::make_shared<VectorVfsDirectory>(files, std::vector<VirtualDir>{}, "data"); } +namespace NgWord2Data { + +constexpr std::size_t NUMBER_AC_NX_FILES = 0x10; + +// Should this archive replacement mysteriously not work on a future game, consider updating. +constexpr std::array<u8, 4> VERSION_DAT{0x0, 0x0, 0x0, 0x15}; // 5.1.0 System Version + +constexpr std::array<u8, 0x2C> AC_NX_DATA{ + 0x1F, 0x8B, 0x08, 0x08, 0xD5, 0x2C, 0x09, 0x5C, 0x04, 0x00, 0x61, 0x63, 0x72, 0x61, 0x77, + 0x00, 0xED, 0xC1, 0x01, 0x0D, 0x00, 0x00, 0x00, 0xC2, 0x20, 0xFB, 0xA7, 0xB6, 0xC7, 0x07, + 0x0C, 0x00, 0x00, 0x00, 0xC8, 0x3B, 0x11, 0x00, 0x1C, 0xC7, 0x00, 0x10, 0x00, 0x00, +}; // Deserializes to no bad words + +} // namespace NgWord2Data + +VirtualDir NgWord2() { + std::vector<VirtualFile> files(NgWord2Data::NUMBER_AC_NX_FILES * 3); + + for (std::size_t i = 0; i < NgWord2Data::NUMBER_AC_NX_FILES; ++i) { + files[3 * i] = std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>( + NgWord2Data::AC_NX_DATA, fmt::format("ac_{}_b1_nx", i)); + files[3 * i + 1] = std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>( + NgWord2Data::AC_NX_DATA, fmt::format("ac_{}_b2_nx", i)); + files[3 * i + 2] = std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>( + NgWord2Data::AC_NX_DATA, fmt::format("ac_{}_not_b_nx", i)); + } + + files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>( + NgWord2Data::AC_NX_DATA, "ac_common_b1_nx")); + files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>( + NgWord2Data::AC_NX_DATA, "ac_common_b2_nx")); + files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>( + NgWord2Data::AC_NX_DATA, "ac_common_not_b_nx")); + files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::VERSION_DAT.size()>>( + NgWord2Data::VERSION_DAT, "version.dat")); + + return std::make_shared<VectorVfsDirectory>(files, std::vector<VirtualDir>{}, "data"); +} + } // namespace FileSys::SystemArchive diff --git a/src/core/file_sys/system_archive/ng_word.h b/src/core/file_sys/system_archive/ng_word.h index f4bc67344..cd81e0abb 100644 --- a/src/core/file_sys/system_archive/ng_word.h +++ b/src/core/file_sys/system_archive/ng_word.h @@ -9,5 +9,6 @@ namespace FileSys::SystemArchive { VirtualDir NgWord1(); +VirtualDir NgWord2(); } // namespace FileSys::SystemArchive diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp index c9c40a07d..e3e79f40a 100644 --- a/src/core/file_sys/system_archive/system_archive.cpp +++ b/src/core/file_sys/system_archive/system_archive.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <functional> #include "common/logging/log.h" #include "core/file_sys/romfs.h" #include "core/file_sys/system_archive/ng_word.h" @@ -13,7 +12,7 @@ namespace FileSys::SystemArchive { constexpr u64 SYSTEM_ARCHIVE_BASE_TITLE_ID = 0x0100000000000800; constexpr std::size_t SYSTEM_ARCHIVE_COUNT = 0x28; -using SystemArchiveSupplier = std::function<VirtualDir()>; +using SystemArchiveSupplier = VirtualDir (*)(); struct SystemArchiveDescriptor { u64 title_id; @@ -21,7 +20,7 @@ struct SystemArchiveDescriptor { SystemArchiveSupplier supplier; }; -const std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES = {{ +constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES{{ {0x0100000000000800, "CertStore", nullptr}, {0x0100000000000801, "ErrorMessage", nullptr}, {0x0100000000000802, "MiiModel", nullptr}, @@ -57,7 +56,7 @@ const std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES {0x0100000000000820, "PlatformConfigCopper", nullptr}, {0x0100000000000821, "PlatformConfigHoag", nullptr}, {0x0100000000000822, "ControllerFirmware", nullptr}, - {0x0100000000000823, "NgWord2", nullptr}, + {0x0100000000000823, "NgWord2", &NgWord2}, {0x0100000000000824, "PlatformConfigIcosaMariko", nullptr}, {0x0100000000000825, "ApplicationBlackList", nullptr}, {0x0100000000000826, "RebootlessSystemUpdateVersion", nullptr}, diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp index bb1b68778..0ea851a74 100644 --- a/src/core/hle/kernel/object.cpp +++ b/src/core/hle/kernel/object.cpp @@ -15,6 +15,7 @@ bool Object::IsWaitable() const { switch (GetHandleType()) { case HandleType::ReadableEvent: case HandleType::Thread: + case HandleType::Process: case HandleType::Timer: case HandleType::ServerPort: case HandleType::ServerSession: @@ -23,7 +24,6 @@ bool Object::IsWaitable() const { case HandleType::Unknown: case HandleType::WritableEvent: case HandleType::SharedMemory: - case HandleType::Process: case HandleType::AddressArbiter: case HandleType::ResourceLimit: case HandleType::ClientPort: diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 4ecb8c926..211bf6686 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -9,6 +9,7 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/file_sys/program_metadata.h" +#include "core/hle/kernel/errors.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" @@ -48,6 +49,21 @@ SharedPtr<ResourceLimit> Process::GetResourceLimit() const { return resource_limit; } +ResultCode Process::ClearSignalState() { + if (status == ProcessStatus::Exited) { + LOG_ERROR(Kernel, "called on a terminated process instance."); + return ERR_INVALID_STATE; + } + + if (!is_signaled) { + LOG_ERROR(Kernel, "called on a process instance that isn't signaled."); + return ERR_INVALID_STATE; + } + + is_signaled = false; + return RESULT_SUCCESS; +} + void Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { program_id = metadata.GetTitleID(); is_64bit_process = metadata.Is64BitProgram(); @@ -137,13 +153,13 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) { .Unwrap(); vm_manager.LogLayout(); - status = ProcessStatus::Running; + ChangeStatus(ProcessStatus::Running); Kernel::SetupMainThread(kernel, entry_point, main_thread_priority, *this); } void Process::PrepareForTermination() { - status = ProcessStatus::Exited; + ChangeStatus(ProcessStatus::Exiting); const auto stop_threads = [this](const std::vector<SharedPtr<Thread>>& thread_list) { for (auto& thread : thread_list) { @@ -167,6 +183,8 @@ void Process::PrepareForTermination() { stop_threads(system.Scheduler(1).GetThreadList()); stop_threads(system.Scheduler(2).GetThreadList()); stop_threads(system.Scheduler(3).GetThreadList()); + + ChangeStatus(ProcessStatus::Exited); } /** @@ -265,7 +283,25 @@ ResultCode Process::UnmapMemory(VAddr dst_addr, VAddr /*src_addr*/, u64 size) { return vm_manager.UnmapRange(dst_addr, size); } -Kernel::Process::Process(KernelCore& kernel) : Object{kernel} {} +Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {} Kernel::Process::~Process() {} +void Process::Acquire(Thread* thread) { + ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); +} + +bool Process::ShouldWait(Thread* thread) const { + return !is_signaled; +} + +void Process::ChangeStatus(ProcessStatus new_status) { + if (status == new_status) { + return; + } + + status = new_status; + is_signaled = true; + WakeupAllWaitingThreads(); +} + } // namespace Kernel diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 49345aa66..bcb9ac4b8 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -14,9 +14,10 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "core/hle/kernel/handle_table.h" -#include "core/hle/kernel/object.h" #include "core/hle/kernel/thread.h" #include "core/hle/kernel/vm_manager.h" +#include "core/hle/kernel/wait_object.h" +#include "core/hle/result.h" namespace FileSys { class ProgramMetadata; @@ -117,7 +118,7 @@ struct CodeSet final { VAddr entrypoint = 0; }; -class Process final : public Object { +class Process final : public WaitObject { public: static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; @@ -212,6 +213,16 @@ public: return random_entropy.at(index); } + /// Clears the signaled state of the process if and only if it's signaled. + /// + /// @pre The process must not be already terminated. If this is called on a + /// terminated process, then ERR_INVALID_STATE will be returned. + /// + /// @pre The process must be in a signaled state. If this is called on a + /// process instance that is not signaled, ERR_INVALID_STATE will be + /// returned. + ResultCode ClearSignalState(); + /** * Loads process-specifics configuration info with metadata provided * by an executable. @@ -260,6 +271,17 @@ private: explicit Process(KernelCore& kernel); ~Process() override; + /// Checks if the specified thread should wait until this process is available. + bool ShouldWait(Thread* thread) const override; + + /// Acquires/locks this process for the specified thread if it's available. + void Acquire(Thread* thread) override; + + /// Changes the process status. If the status is different + /// from the current process status, then this will trigger + /// a process signal. + void ChangeStatus(ProcessStatus new_status); + /// Memory manager for this process. Kernel::VMManager vm_manager; @@ -305,6 +327,10 @@ private: /// specified by metadata provided to the process during loading. bool is_64bit_process = true; + /// Whether or not this process is signaled. This occurs + /// upon the process changing to a different state. + bool is_signaled = false; + /// Total running time for the process in ticks. u64 total_process_running_time_ticks = 0; diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index 92e16b4e6..ba01f495c 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp @@ -4,10 +4,10 @@ #include <algorithm> #include "common/assert.h" +#include "core/hle/kernel/errors.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/readable_event.h" #include "core/hle/kernel/thread.h" -#include "core/hle/kernel/writable_event.h" namespace Kernel { @@ -34,6 +34,16 @@ void ReadableEvent::Clear() { signaled = false; } +ResultCode ReadableEvent::Reset() { + if (!signaled) { + return ERR_INVALID_STATE; + } + + Clear(); + + return RESULT_SUCCESS; +} + void ReadableEvent::WakeupAllWaitingThreads() { WaitObject::WakeupAllWaitingThreads(); diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h index 867ff3051..80b3b0aba 100644 --- a/src/core/hle/kernel/readable_event.h +++ b/src/core/hle/kernel/readable_event.h @@ -7,6 +7,8 @@ #include "core/hle/kernel/object.h" #include "core/hle/kernel/wait_object.h" +union ResultCode; + namespace Kernel { class KernelCore; @@ -39,8 +41,17 @@ public: void WakeupAllWaitingThreads() override; + /// Unconditionally clears the readable event's state. void Clear(); + /// Clears the readable event's state if and only if it + /// has already been signaled. + /// + /// @pre The event must be in a signaled state. If this event + /// is in an unsignaled state and this function is called, + /// then ERR_INVALID_STATE will be returned. + ResultCode Reset(); + private: explicit ReadableEvent(KernelCore& kernel); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index e6c77f9db..84df2040e 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1433,17 +1433,24 @@ static ResultCode CloseHandle(Handle handle) { return handle_table.Close(handle); } -/// Reset an event +/// Clears the signaled state of an event or process. static ResultCode ResetSignal(Handle handle) { LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle); const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); + auto event = handle_table.Get<ReadableEvent>(handle); + if (event) { + return event->Reset(); + } - ASSERT(event != nullptr); + auto process = handle_table.Get<Process>(handle); + if (process) { + return process->ClearSignalState(); + } - event->Clear(); - return RESULT_SUCCESS; + LOG_ERROR(Kernel_SVC, "Invalid handle (0x{:08X})", handle); + return ERR_INVALID_HANDLE; } /// Creates a TransferMemory object diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index ca119dd3a..453d90a22 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp @@ -335,10 +335,7 @@ public: vm_manager.ReprotectRange(*map_address + header.rw_offset, header.rw_size, Kernel::VMAPermission::ReadWrite); - Core::System::GetInstance().ArmInterface(0).ClearInstructionCache(); - Core::System::GetInstance().ArmInterface(1).ClearInstructionCache(); - Core::System::GetInstance().ArmInterface(2).ClearInstructionCache(); - Core::System::GetInstance().ArmInterface(3).ClearInstructionCache(); + Core::System::GetInstance().InvalidateCpuInstructionCaches(); nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size}); @@ -391,10 +388,7 @@ public: Kernel::MemoryState::ModuleCodeStatic) == RESULT_SUCCESS); ASSERT(process->UnmapMemory(mapped_addr, 0, nro_size) == RESULT_SUCCESS); - Core::System::GetInstance().ArmInterface(0).ClearInstructionCache(); - Core::System::GetInstance().ArmInterface(1).ClearInstructionCache(); - Core::System::GetInstance().ArmInterface(2).ClearInstructionCache(); - Core::System::GetInstance().ArmInterface(3).ClearInstructionCache(); + Core::System::GetInstance().InvalidateCpuInstructionCaches(); nro.erase(iter); IPC::ResponseBuilder rb{ctx, 2}; diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 70abd856a..41fd2a6a0 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -53,6 +53,14 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { pointers.resize(num_page_table_entries); attributes.resize(num_page_table_entries); + + // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the + // vector size is subsequently decreased (via resize), the vector might not automatically + // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for + // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use. + + pointers.shrink_to_fit(); + attributes.shrink_to_fit(); } static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) { diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index b9faaf8e0..5ea094e64 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1049,6 +1049,7 @@ union Instruction { BitField<49, 1, u64> nodep_flag; BitField<50, 3, u64> component_mask_selector; BitField<53, 4, u64> texture_info; + BitField<60, 1, u64> fp32_flag; TextureType GetTextureType() const { // The TEXS instruction has a weird encoding for the texture type. @@ -1549,7 +1550,7 @@ private: INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), - INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), + INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9e93bd609..2b29fc45f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -79,6 +79,26 @@ struct DrawParameters { } }; +struct FramebufferCacheKey { + bool is_single_buffer = false; + bool stencil_enable = false; + + std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{}; + std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{}; + u32 colors_count = 0; + + GLuint zeta = 0; + + auto Tie() const { + return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, + zeta); + } + + bool operator<(const FramebufferCacheKey& rhs) const { + return Tie() < rhs.Tie(); + } +}; + RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { @@ -90,9 +110,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo OpenGLState::ApplyDefaultState(); - // Create render framebuffer - framebuffer.Create(); - shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.Apply(); @@ -361,6 +378,44 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SyncClipEnabled(clip_distances); } +void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, + OpenGLState& current_state) { + const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); + auto& framebuffer = entry->second; + + if (is_cache_miss) + framebuffer.Create(); + + current_state.draw.draw_framebuffer = framebuffer.handle; + current_state.ApplyFramebufferState(); + + if (!is_cache_miss) + return; + + if (fbkey.is_single_buffer) { + if (fbkey.color_attachments[0] != GL_NONE) { + glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0], + 0); + } + glDrawBuffer(fbkey.color_attachments[0]); + } else { + for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { + if (fbkey.colors[index]) { + glFramebufferTexture(GL_DRAW_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), + fbkey.colors[index], 0); + } + } + glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data()); + } + + if (fbkey.zeta) { + GLenum zeta_attachment = + fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; + glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0); + } +} + std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; @@ -444,10 +499,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0); // Bind the framebuffer surfaces - current_state.draw.draw_framebuffer = framebuffer.handle; - current_state.ApplyFramebufferState(); current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; + FramebufferCacheKey fbkey; + if (using_color_fb) { if (single_color_target) { // Used when just a single color attachment is enabled, e.g. for clearing a color buffer @@ -463,14 +518,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; } - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->Texture().handle : 0, 0); - glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target)); + fbkey.is_single_buffer = true; + fbkey.color_attachments[0] = + GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target); + fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0; } else { // Multiple color attachments are enabled - std::array<GLenum, Maxwell::NumRenderTargets> buffers; for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); @@ -485,22 +538,17 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us color_surface->GetSurfaceParams().srgb_conversion; } - buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), - GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0, - 0); + fbkey.color_attachments[index] = + GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); + fbkey.colors[index] = + color_surface != nullptr ? color_surface->Texture().handle : 0; } - glDrawBuffers(regs.rt_control.count, buffers.data()); + fbkey.is_single_buffer = false; + fbkey.colors_count = regs.rt_control.count; } } else { - // No color attachments are enabled - zero out all of them - for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D, - 0, 0); - } - glDrawBuffer(GL_NONE); + // No color attachments are enabled - leave them as zero + fbkey.is_single_buffer = true; } if (depth_surface) { @@ -508,22 +556,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us // the shader doesn't actually write to it. depth_surface->MarkAsModified(true, res_cache); - if (regs.stencil_enable) { - // Attach both depth and stencil - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->Texture().handle, 0); - } else { - // Attach depth - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->Texture().handle, 0); - // Clear stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } - } else { - // Clear both depth and stencil attachment - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); + fbkey.zeta = depth_surface->Texture().handle; + fbkey.stencil_enable = regs.stencil_enable; } + + SetupCachedFramebuffer(fbkey, current_state); + SyncViewport(current_state); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 988fa3e27..8a891ffc7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -40,6 +40,7 @@ namespace OpenGL { struct ScreenInfo; struct DrawParameters; +struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: @@ -195,11 +196,12 @@ private: OGLVertexArray> vertex_array_cache; + std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; + std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - OGLFramebuffer framebuffer; PrimitiveAssembler primitive_assembler{buffer_cache}; GLint uniform_buffer_alignment; @@ -214,6 +216,8 @@ private: void SetupShaders(GLenum primitive_mode); + void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state); + enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 8d68156bf..4fc09cac6 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -50,6 +50,14 @@ public: using std::runtime_error::runtime_error; }; +/// Generates code to use for a swizzle operation. +static std::string GetSwizzle(u64 elem) { + ASSERT(elem <= 3); + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + return swizzle; +} + /// Translate topology static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { @@ -1004,14 +1012,6 @@ private: } } - /// Generates code to use for a swizzle operation. - static std::string GetSwizzle(u64 elem) { - ASSERT(elem <= 3); - std::string swizzle = "."; - swizzle += "xyzw"[elem]; - return swizzle; - } - ShaderWriter& shader; ShaderWriter& declarations; std::vector<GLSLRegister> regs; @@ -1343,7 +1343,7 @@ private: regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); } - void WriteTexsInstruction(const Instruction& instr, const std::string& texture) { + void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 @@ -1368,6 +1368,38 @@ private: } } + void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + std::array<std::string, 4> components; + u32 written_components = 0; + + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + components[written_components++] = texture + GetSwizzle(component); + } + if (written_components == 0) + return; + + const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) { + return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')'; + }; + + regs.SetRegisterToHalfFloat( + instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + + if (written_components > 2) { + ASSERT(instr.texs.HasTwoDestinations()); + regs.SetRegisterToHalfFloat( + instr.gpr28, 0, + BuildComponent(components[2], components[3], written_components > 3), + Tegra::Shader::HalfMerge::H0_H1, 1, 1); + } + } + static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { switch (texture_type) { case Tegra::Shader::TextureType::Texture1D: @@ -2766,24 +2798,27 @@ private: const bool depth_compare = instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); const auto process_mode = instr.texs.GetTextureProcessMode(); + UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), "NODEP is not implemented"); const auto scope = shader.Scope(); - const auto [coord, texture] = + auto [coord, texture] = GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); shader.AddLine(coord); - if (!depth_compare) { - shader.AddLine("vec4 texture_tmp = " + texture + ';'); + if (depth_compare) { + texture = "vec4(" + texture + ')'; + } + shader.AddLine("vec4 texture_tmp = " + texture + ';'); + if (instr.texs.fp32_flag) { + WriteTexsInstructionFloat(instr, "texture_tmp"); } else { - shader.AddLine("vec4 texture_tmp = vec4(" + texture + ");"); + WriteTexsInstructionHalfFloat(instr, "texture_tmp"); } - - WriteTexsInstruction(instr, "texture_tmp"); break; } case OpCode::Id::TLDS: { @@ -2842,7 +2877,7 @@ private: } }(); - WriteTexsInstruction(instr, texture); + WriteTexsInstructionFloat(instr, texture); break; } case OpCode::Id::TLD4: { @@ -2940,7 +2975,8 @@ private: if (depth_compare) { texture = "vec4(" + texture + ')'; } - WriteTexsInstruction(instr, texture); + + WriteTexsInstructionFloat(instr, texture); break; } case OpCode::Id::TXQ: { |