summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/core/file_sys/system_archive/ng_word.cpp41
-rw-r--r--src/core/file_sys/system_archive/ng_word.h1
-rw-r--r--src/core/file_sys/system_archive/system_archive.cpp7
-rw-r--r--src/core/hle/kernel/object.cpp2
-rw-r--r--src/core/hle/kernel/process.cpp42
-rw-r--r--src/core/hle/kernel/process.h30
-rw-r--r--src/core/hle/kernel/readable_event.cpp12
-rw-r--r--src/core/hle/kernel/readable_event.h11
-rw-r--r--src/core/hle/kernel/svc.cpp15
-rw-r--r--src/core/hle/service/ldr/ldr.cpp10
-rw-r--r--src/core/memory.cpp8
-rw-r--r--src/video_core/engines/shader_bytecode.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp116
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp70
15 files changed, 292 insertions, 82 deletions
diff --git a/src/core/file_sys/system_archive/ng_word.cpp b/src/core/file_sys/system_archive/ng_word.cpp
index d0acdbd49..f4443784d 100644
--- a/src/core/file_sys/system_archive/ng_word.cpp
+++ b/src/core/file_sys/system_archive/ng_word.cpp
@@ -26,7 +26,7 @@ constexpr std::array<u8, 30> WORD_TXT{
VirtualDir NgWord1() {
std::vector<VirtualFile> files(NgWord1Data::NUMBER_WORD_TXT_FILES);
- for (std::size_t i = 0; i < NgWord1Data::NUMBER_WORD_TXT_FILES; ++i) {
+ for (std::size_t i = 0; i < files.size(); ++i) {
files[i] = std::make_shared<ArrayVfsFile<NgWord1Data::WORD_TXT.size()>>(
NgWord1Data::WORD_TXT, fmt::format("{}.txt", i));
}
@@ -39,4 +39,43 @@ VirtualDir NgWord1() {
return std::make_shared<VectorVfsDirectory>(files, std::vector<VirtualDir>{}, "data");
}
+namespace NgWord2Data {
+
+constexpr std::size_t NUMBER_AC_NX_FILES = 0x10;
+
+// Should this archive replacement mysteriously not work on a future game, consider updating.
+constexpr std::array<u8, 4> VERSION_DAT{0x0, 0x0, 0x0, 0x15}; // 5.1.0 System Version
+
+constexpr std::array<u8, 0x2C> AC_NX_DATA{
+ 0x1F, 0x8B, 0x08, 0x08, 0xD5, 0x2C, 0x09, 0x5C, 0x04, 0x00, 0x61, 0x63, 0x72, 0x61, 0x77,
+ 0x00, 0xED, 0xC1, 0x01, 0x0D, 0x00, 0x00, 0x00, 0xC2, 0x20, 0xFB, 0xA7, 0xB6, 0xC7, 0x07,
+ 0x0C, 0x00, 0x00, 0x00, 0xC8, 0x3B, 0x11, 0x00, 0x1C, 0xC7, 0x00, 0x10, 0x00, 0x00,
+}; // Deserializes to no bad words
+
+} // namespace NgWord2Data
+
+VirtualDir NgWord2() {
+ std::vector<VirtualFile> files(NgWord2Data::NUMBER_AC_NX_FILES * 3);
+
+ for (std::size_t i = 0; i < NgWord2Data::NUMBER_AC_NX_FILES; ++i) {
+ files[3 * i] = std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>(
+ NgWord2Data::AC_NX_DATA, fmt::format("ac_{}_b1_nx", i));
+ files[3 * i + 1] = std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>(
+ NgWord2Data::AC_NX_DATA, fmt::format("ac_{}_b2_nx", i));
+ files[3 * i + 2] = std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>(
+ NgWord2Data::AC_NX_DATA, fmt::format("ac_{}_not_b_nx", i));
+ }
+
+ files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>(
+ NgWord2Data::AC_NX_DATA, "ac_common_b1_nx"));
+ files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>(
+ NgWord2Data::AC_NX_DATA, "ac_common_b2_nx"));
+ files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::AC_NX_DATA.size()>>(
+ NgWord2Data::AC_NX_DATA, "ac_common_not_b_nx"));
+ files.push_back(std::make_shared<ArrayVfsFile<NgWord2Data::VERSION_DAT.size()>>(
+ NgWord2Data::VERSION_DAT, "version.dat"));
+
+ return std::make_shared<VectorVfsDirectory>(files, std::vector<VirtualDir>{}, "data");
+}
+
} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/ng_word.h b/src/core/file_sys/system_archive/ng_word.h
index f4bc67344..cd81e0abb 100644
--- a/src/core/file_sys/system_archive/ng_word.h
+++ b/src/core/file_sys/system_archive/ng_word.h
@@ -9,5 +9,6 @@
namespace FileSys::SystemArchive {
VirtualDir NgWord1();
+VirtualDir NgWord2();
} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/system_archive.cpp b/src/core/file_sys/system_archive/system_archive.cpp
index c9c40a07d..e3e79f40a 100644
--- a/src/core/file_sys/system_archive/system_archive.cpp
+++ b/src/core/file_sys/system_archive/system_archive.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <functional>
#include "common/logging/log.h"
#include "core/file_sys/romfs.h"
#include "core/file_sys/system_archive/ng_word.h"
@@ -13,7 +12,7 @@ namespace FileSys::SystemArchive {
constexpr u64 SYSTEM_ARCHIVE_BASE_TITLE_ID = 0x0100000000000800;
constexpr std::size_t SYSTEM_ARCHIVE_COUNT = 0x28;
-using SystemArchiveSupplier = std::function<VirtualDir()>;
+using SystemArchiveSupplier = VirtualDir (*)();
struct SystemArchiveDescriptor {
u64 title_id;
@@ -21,7 +20,7 @@ struct SystemArchiveDescriptor {
SystemArchiveSupplier supplier;
};
-const std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES = {{
+constexpr std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES{{
{0x0100000000000800, "CertStore", nullptr},
{0x0100000000000801, "ErrorMessage", nullptr},
{0x0100000000000802, "MiiModel", nullptr},
@@ -57,7 +56,7 @@ const std::array<SystemArchiveDescriptor, SYSTEM_ARCHIVE_COUNT> SYSTEM_ARCHIVES
{0x0100000000000820, "PlatformConfigCopper", nullptr},
{0x0100000000000821, "PlatformConfigHoag", nullptr},
{0x0100000000000822, "ControllerFirmware", nullptr},
- {0x0100000000000823, "NgWord2", nullptr},
+ {0x0100000000000823, "NgWord2", &NgWord2},
{0x0100000000000824, "PlatformConfigIcosaMariko", nullptr},
{0x0100000000000825, "ApplicationBlackList", nullptr},
{0x0100000000000826, "RebootlessSystemUpdateVersion", nullptr},
diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp
index bb1b68778..0ea851a74 100644
--- a/src/core/hle/kernel/object.cpp
+++ b/src/core/hle/kernel/object.cpp
@@ -15,6 +15,7 @@ bool Object::IsWaitable() const {
switch (GetHandleType()) {
case HandleType::ReadableEvent:
case HandleType::Thread:
+ case HandleType::Process:
case HandleType::Timer:
case HandleType::ServerPort:
case HandleType::ServerSession:
@@ -23,7 +24,6 @@ bool Object::IsWaitable() const {
case HandleType::Unknown:
case HandleType::WritableEvent:
case HandleType::SharedMemory:
- case HandleType::Process:
case HandleType::AddressArbiter:
case HandleType::ResourceLimit:
case HandleType::ClientPort:
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 4ecb8c926..211bf6686 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -9,6 +9,7 @@
#include "common/logging/log.h"
#include "core/core.h"
#include "core/file_sys/program_metadata.h"
+#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
@@ -48,6 +49,21 @@ SharedPtr<ResourceLimit> Process::GetResourceLimit() const {
return resource_limit;
}
+ResultCode Process::ClearSignalState() {
+ if (status == ProcessStatus::Exited) {
+ LOG_ERROR(Kernel, "called on a terminated process instance.");
+ return ERR_INVALID_STATE;
+ }
+
+ if (!is_signaled) {
+ LOG_ERROR(Kernel, "called on a process instance that isn't signaled.");
+ return ERR_INVALID_STATE;
+ }
+
+ is_signaled = false;
+ return RESULT_SUCCESS;
+}
+
void Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
program_id = metadata.GetTitleID();
is_64bit_process = metadata.Is64BitProgram();
@@ -137,13 +153,13 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u32 stack_size) {
.Unwrap();
vm_manager.LogLayout();
- status = ProcessStatus::Running;
+ ChangeStatus(ProcessStatus::Running);
Kernel::SetupMainThread(kernel, entry_point, main_thread_priority, *this);
}
void Process::PrepareForTermination() {
- status = ProcessStatus::Exited;
+ ChangeStatus(ProcessStatus::Exiting);
const auto stop_threads = [this](const std::vector<SharedPtr<Thread>>& thread_list) {
for (auto& thread : thread_list) {
@@ -167,6 +183,8 @@ void Process::PrepareForTermination() {
stop_threads(system.Scheduler(1).GetThreadList());
stop_threads(system.Scheduler(2).GetThreadList());
stop_threads(system.Scheduler(3).GetThreadList());
+
+ ChangeStatus(ProcessStatus::Exited);
}
/**
@@ -265,7 +283,25 @@ ResultCode Process::UnmapMemory(VAddr dst_addr, VAddr /*src_addr*/, u64 size) {
return vm_manager.UnmapRange(dst_addr, size);
}
-Kernel::Process::Process(KernelCore& kernel) : Object{kernel} {}
+Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {}
Kernel::Process::~Process() {}
+void Process::Acquire(Thread* thread) {
+ ASSERT_MSG(!ShouldWait(thread), "Object unavailable!");
+}
+
+bool Process::ShouldWait(Thread* thread) const {
+ return !is_signaled;
+}
+
+void Process::ChangeStatus(ProcessStatus new_status) {
+ if (status == new_status) {
+ return;
+ }
+
+ status = new_status;
+ is_signaled = true;
+ WakeupAllWaitingThreads();
+}
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 49345aa66..bcb9ac4b8 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -14,9 +14,10 @@
#include "common/bit_field.h"
#include "common/common_types.h"
#include "core/hle/kernel/handle_table.h"
-#include "core/hle/kernel/object.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/vm_manager.h"
+#include "core/hle/kernel/wait_object.h"
+#include "core/hle/result.h"
namespace FileSys {
class ProgramMetadata;
@@ -117,7 +118,7 @@ struct CodeSet final {
VAddr entrypoint = 0;
};
-class Process final : public Object {
+class Process final : public WaitObject {
public:
static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4;
@@ -212,6 +213,16 @@ public:
return random_entropy.at(index);
}
+ /// Clears the signaled state of the process if and only if it's signaled.
+ ///
+ /// @pre The process must not be already terminated. If this is called on a
+ /// terminated process, then ERR_INVALID_STATE will be returned.
+ ///
+ /// @pre The process must be in a signaled state. If this is called on a
+ /// process instance that is not signaled, ERR_INVALID_STATE will be
+ /// returned.
+ ResultCode ClearSignalState();
+
/**
* Loads process-specifics configuration info with metadata provided
* by an executable.
@@ -260,6 +271,17 @@ private:
explicit Process(KernelCore& kernel);
~Process() override;
+ /// Checks if the specified thread should wait until this process is available.
+ bool ShouldWait(Thread* thread) const override;
+
+ /// Acquires/locks this process for the specified thread if it's available.
+ void Acquire(Thread* thread) override;
+
+ /// Changes the process status. If the status is different
+ /// from the current process status, then this will trigger
+ /// a process signal.
+ void ChangeStatus(ProcessStatus new_status);
+
/// Memory manager for this process.
Kernel::VMManager vm_manager;
@@ -305,6 +327,10 @@ private:
/// specified by metadata provided to the process during loading.
bool is_64bit_process = true;
+ /// Whether or not this process is signaled. This occurs
+ /// upon the process changing to a different state.
+ bool is_signaled = false;
+
/// Total running time for the process in ticks.
u64 total_process_running_time_ticks = 0;
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 92e16b4e6..ba01f495c 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -4,10 +4,10 @@
#include <algorithm>
#include "common/assert.h"
+#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/readable_event.h"
#include "core/hle/kernel/thread.h"
-#include "core/hle/kernel/writable_event.h"
namespace Kernel {
@@ -34,6 +34,16 @@ void ReadableEvent::Clear() {
signaled = false;
}
+ResultCode ReadableEvent::Reset() {
+ if (!signaled) {
+ return ERR_INVALID_STATE;
+ }
+
+ Clear();
+
+ return RESULT_SUCCESS;
+}
+
void ReadableEvent::WakeupAllWaitingThreads() {
WaitObject::WakeupAllWaitingThreads();
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h
index 867ff3051..80b3b0aba 100644
--- a/src/core/hle/kernel/readable_event.h
+++ b/src/core/hle/kernel/readable_event.h
@@ -7,6 +7,8 @@
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/wait_object.h"
+union ResultCode;
+
namespace Kernel {
class KernelCore;
@@ -39,8 +41,17 @@ public:
void WakeupAllWaitingThreads() override;
+ /// Unconditionally clears the readable event's state.
void Clear();
+ /// Clears the readable event's state if and only if it
+ /// has already been signaled.
+ ///
+ /// @pre The event must be in a signaled state. If this event
+ /// is in an unsignaled state and this function is called,
+ /// then ERR_INVALID_STATE will be returned.
+ ResultCode Reset();
+
private:
explicit ReadableEvent(KernelCore& kernel);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index e6c77f9db..84df2040e 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1433,17 +1433,24 @@ static ResultCode CloseHandle(Handle handle) {
return handle_table.Close(handle);
}
-/// Reset an event
+/// Clears the signaled state of an event or process.
static ResultCode ResetSignal(Handle handle) {
LOG_DEBUG(Kernel_SVC, "called handle 0x{:08X}", handle);
const auto& handle_table = Core::CurrentProcess()->GetHandleTable();
+
auto event = handle_table.Get<ReadableEvent>(handle);
+ if (event) {
+ return event->Reset();
+ }
- ASSERT(event != nullptr);
+ auto process = handle_table.Get<Process>(handle);
+ if (process) {
+ return process->ClearSignalState();
+ }
- event->Clear();
- return RESULT_SUCCESS;
+ LOG_ERROR(Kernel_SVC, "Invalid handle (0x{:08X})", handle);
+ return ERR_INVALID_HANDLE;
}
/// Creates a TransferMemory object
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index ca119dd3a..453d90a22 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -335,10 +335,7 @@ public:
vm_manager.ReprotectRange(*map_address + header.rw_offset, header.rw_size,
Kernel::VMAPermission::ReadWrite);
- Core::System::GetInstance().ArmInterface(0).ClearInstructionCache();
- Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
- Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
- Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
+ Core::System::GetInstance().InvalidateCpuInstructionCaches();
nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size});
@@ -391,10 +388,7 @@ public:
Kernel::MemoryState::ModuleCodeStatic) == RESULT_SUCCESS);
ASSERT(process->UnmapMemory(mapped_addr, 0, nro_size) == RESULT_SUCCESS);
- Core::System::GetInstance().ArmInterface(0).ClearInstructionCache();
- Core::System::GetInstance().ArmInterface(1).ClearInstructionCache();
- Core::System::GetInstance().ArmInterface(2).ClearInstructionCache();
- Core::System::GetInstance().ArmInterface(3).ClearInstructionCache();
+ Core::System::GetInstance().InvalidateCpuInstructionCaches();
nro.erase(iter);
IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 70abd856a..41fd2a6a0 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -53,6 +53,14 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
pointers.resize(num_page_table_entries);
attributes.resize(num_page_table_entries);
+
+ // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
+ // vector size is subsequently decreased (via resize), the vector might not automatically
+ // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
+ // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
+
+ pointers.shrink_to_fit();
+ attributes.shrink_to_fit();
}
static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index b9faaf8e0..5ea094e64 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1049,6 +1049,7 @@ union Instruction {
BitField<49, 1, u64> nodep_flag;
BitField<50, 3, u64> component_mask_selector;
BitField<53, 4, u64> texture_info;
+ BitField<60, 1, u64> fp32_flag;
TextureType GetTextureType() const {
// The TEXS instruction has a weird encoding for the texture type.
@@ -1549,7 +1550,7 @@ private:
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
- INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
+ INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9e93bd609..2b29fc45f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -79,6 +79,26 @@ struct DrawParameters {
}
};
+struct FramebufferCacheKey {
+ bool is_single_buffer = false;
+ bool stencil_enable = false;
+
+ std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};
+ u32 colors_count = 0;
+
+ GLuint zeta = 0;
+
+ auto Tie() const {
+ return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count,
+ zeta);
+ }
+
+ bool operator<(const FramebufferCacheKey& rhs) const {
+ return Tie() < rhs.Tie();
+ }
+};
+
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
@@ -90,9 +110,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
OpenGLState::ApplyDefaultState();
- // Create render framebuffer
- framebuffer.Create();
-
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.Apply();
@@ -361,6 +378,44 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SyncClipEnabled(clip_distances);
}
+void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
+ OpenGLState& current_state) {
+ const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
+ auto& framebuffer = entry->second;
+
+ if (is_cache_miss)
+ framebuffer.Create();
+
+ current_state.draw.draw_framebuffer = framebuffer.handle;
+ current_state.ApplyFramebufferState();
+
+ if (!is_cache_miss)
+ return;
+
+ if (fbkey.is_single_buffer) {
+ if (fbkey.color_attachments[0] != GL_NONE) {
+ glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0],
+ 0);
+ }
+ glDrawBuffer(fbkey.color_attachments[0]);
+ } else {
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ if (fbkey.colors[index]) {
+ glFramebufferTexture(GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+ fbkey.colors[index], 0);
+ }
+ }
+ glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data());
+ }
+
+ if (fbkey.zeta) {
+ GLenum zeta_attachment =
+ fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
+ glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0);
+ }
+}
+
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@@ -444,10 +499,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
// Bind the framebuffer surfaces
- current_state.draw.draw_framebuffer = framebuffer.handle;
- current_state.ApplyFramebufferState();
current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
+ FramebufferCacheKey fbkey;
+
if (using_color_fb) {
if (single_color_target) {
// Used when just a single color attachment is enabled, e.g. for clearing a color buffer
@@ -463,14 +518,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
}
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER,
- GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
- color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
- glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+ fbkey.is_single_buffer = true;
+ fbkey.color_attachments[0] =
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
+ fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0;
} else {
// Multiple color attachments are enabled
- std::array<GLenum, Maxwell::NumRenderTargets> buffers;
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
@@ -485,22 +538,17 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
color_surface->GetSurfaceParams().srgb_conversion;
}
- buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
- GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
- 0);
+ fbkey.color_attachments[index] =
+ GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+ fbkey.colors[index] =
+ color_surface != nullptr ? color_surface->Texture().handle : 0;
}
- glDrawBuffers(regs.rt_control.count, buffers.data());
+ fbkey.is_single_buffer = false;
+ fbkey.colors_count = regs.rt_control.count;
}
} else {
- // No color attachments are enabled - zero out all of them
- for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
- GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
- 0, 0);
- }
- glDrawBuffer(GL_NONE);
+ // No color attachments are enabled - leave them as zero
+ fbkey.is_single_buffer = true;
}
if (depth_surface) {
@@ -508,22 +556,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
// the shader doesn't actually write to it.
depth_surface->MarkAsModified(true, res_cache);
- if (regs.stencil_enable) {
- // Attach both depth and stencil
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- } else {
- // Attach depth
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- // Clear stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- }
- } else {
- // Clear both depth and stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
+ fbkey.zeta = depth_surface->Texture().handle;
+ fbkey.stencil_enable = regs.stencil_enable;
}
+
+ SetupCachedFramebuffer(fbkey, current_state);
+
SyncViewport(current_state);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 988fa3e27..8a891ffc7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -40,6 +40,7 @@ namespace OpenGL {
struct ScreenInfo;
struct DrawParameters;
+struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public:
@@ -195,11 +196,12 @@ private:
OGLVertexArray>
vertex_array_cache;
+ std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
+
std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
- OGLFramebuffer framebuffer;
PrimitiveAssembler primitive_assembler{buffer_cache};
GLint uniform_buffer_alignment;
@@ -214,6 +216,8 @@ private:
void SetupShaders(GLenum primitive_mode);
+ void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state);
+
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8d68156bf..4fc09cac6 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -50,6 +50,14 @@ public:
using std::runtime_error::runtime_error;
};
+/// Generates code to use for a swizzle operation.
+static std::string GetSwizzle(u64 elem) {
+ ASSERT(elem <= 3);
+ std::string swizzle = ".";
+ swizzle += "xyzw"[elem];
+ return swizzle;
+}
+
/// Translate topology
static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -1004,14 +1012,6 @@ private:
}
}
- /// Generates code to use for a swizzle operation.
- static std::string GetSwizzle(u64 elem) {
- ASSERT(elem <= 3);
- std::string swizzle = ".";
- swizzle += "xyzw"[elem];
- return swizzle;
- }
-
ShaderWriter& shader;
ShaderWriter& declarations;
std::vector<GLSLRegister> regs;
@@ -1343,7 +1343,7 @@ private:
regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
}
- void WriteTexsInstruction(const Instruction& instr, const std::string& texture) {
+ void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
@@ -1368,6 +1368,38 @@ private:
}
}
+ void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) {
+ // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+ // float instruction).
+
+ std::array<std::string, 4> components;
+ u32 written_components = 0;
+
+ for (u32 component = 0; component < 4; ++component) {
+ if (!instr.texs.IsComponentEnabled(component))
+ continue;
+ components[written_components++] = texture + GetSwizzle(component);
+ }
+ if (written_components == 0)
+ return;
+
+ const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) {
+ return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')';
+ };
+
+ regs.SetRegisterToHalfFloat(
+ instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1),
+ Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+
+ if (written_components > 2) {
+ ASSERT(instr.texs.HasTwoDestinations());
+ regs.SetRegisterToHalfFloat(
+ instr.gpr28, 0,
+ BuildComponent(components[2], components[3], written_components > 3),
+ Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+ }
+ }
+
static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D:
@@ -2766,24 +2798,27 @@ private:
const bool depth_compare =
instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
const auto process_mode = instr.texs.GetTextureProcessMode();
+
UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
const auto scope = shader.Scope();
- const auto [coord, texture] =
+ auto [coord, texture] =
GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array);
shader.AddLine(coord);
- if (!depth_compare) {
- shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ if (depth_compare) {
+ texture = "vec4(" + texture + ')';
+ }
+ shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ if (instr.texs.fp32_flag) {
+ WriteTexsInstructionFloat(instr, "texture_tmp");
} else {
- shader.AddLine("vec4 texture_tmp = vec4(" + texture + ");");
+ WriteTexsInstructionHalfFloat(instr, "texture_tmp");
}
-
- WriteTexsInstruction(instr, "texture_tmp");
break;
}
case OpCode::Id::TLDS: {
@@ -2842,7 +2877,7 @@ private:
}
}();
- WriteTexsInstruction(instr, texture);
+ WriteTexsInstructionFloat(instr, texture);
break;
}
case OpCode::Id::TLD4: {
@@ -2940,7 +2975,8 @@ private:
if (depth_compare) {
texture = "vec4(" + texture + ')';
}
- WriteTexsInstruction(instr, texture);
+
+ WriteTexsInstructionFloat(instr, texture);
break;
}
case OpCode::Id::TXQ: {