summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/command_processor.cpp174
-rw-r--r--src/video_core/command_processor.h17
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_memory.cpp45
-rw-r--r--src/video_core/engines/kepler_memory.h90
-rw-r--r--src/video_core/engines/maxwell_3d.cpp24
-rw-r--r--src/video_core/engines/maxwell_3d.h57
-rw-r--r--src/video_core/engines/maxwell_dma.cpp12
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h361
-rw-r--r--src/video_core/engines/shader_header.h103
-rw-r--r--src/video_core/gpu.cpp3
-rw-r--r--src/video_core/gpu.h16
-rw-r--r--src/video_core/macro_interpreter.h2
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_base.h1
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp93
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h57
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp534
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h57
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp502
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h110
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp837
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h70
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_state.h8
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp4
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp26
-rw-r--r--src/video_core/textures/decoders.cpp53
-rw-r--r--src/video_core/textures/texture.h12
37 files changed, 2379 insertions, 962 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index aa5bc3bbe..f5ae57039 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -5,6 +5,8 @@ add_library(video_core STATIC
debug_utils/debug_utils.h
engines/fermi_2d.cpp
engines/fermi_2d.h
+ engines/kepler_memory.cpp
+ engines/kepler_memory.h
engines/maxwell_3d.cpp
engines/maxwell_3d.h
engines/maxwell_compute.cpp
@@ -12,6 +14,7 @@ add_library(video_core STATIC
engines/maxwell_dma.cpp
engines/maxwell_dma.h
engines/shader_bytecode.h
+ engines/shader_header.h
gpu.cpp
gpu.h
macro_interpreter.cpp
@@ -22,6 +25,8 @@ add_library(video_core STATIC
rasterizer_interface.h
renderer_base.cpp
renderer_base.h
+ renderer_opengl/gl_buffer_cache.cpp
+ renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index dc485e811..f1aa6091b 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -14,6 +14,7 @@
#include "core/tracer/recorder.h"
#include "video_core/command_processor.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
#include "video_core/engines/maxwell_dma.h"
@@ -28,98 +29,109 @@ enum class BufferMethods {
CountBufferMethods = 0x40,
};
-void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
- LOG_TRACE(HW_GPU,
- "Processing method {:08X} on subchannel {} value "
- "{:08X} remaining params {}",
- method, subchannel, value, remaining_params);
-
- if (method == static_cast<u32>(BufferMethods::BindObject)) {
- // Bind the current subchannel to the desired engine id.
- LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
- bound_engines[subchannel] = static_cast<EngineID>(value);
- return;
- }
+MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
- if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
- // TODO(Subv): Research and implement these methods.
- LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
- return;
- }
+void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
+ MICROPROFILE_SCOPE(ProcessCommandLists);
- ASSERT(bound_engines.find(subchannel) != bound_engines.end());
-
- const EngineID engine = bound_engines[subchannel];
-
- switch (engine) {
- case EngineID::FERMI_TWOD_A:
- fermi_2d->WriteReg(method, value);
- break;
- case EngineID::MAXWELL_B:
- maxwell_3d->WriteReg(method, value, remaining_params);
- break;
- case EngineID::MAXWELL_COMPUTE_B:
- maxwell_compute->WriteReg(method, value);
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->WriteReg(method, value);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine");
- }
-}
+ auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
+ LOG_TRACE(HW_GPU,
+ "Processing method {:08X} on subchannel {} value "
+ "{:08X} remaining params {}",
+ method, subchannel, value, remaining_params);
-void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
- const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
- VAddr current_addr = *head_address;
- while (current_addr < *head_address + size * sizeof(CommandHeader)) {
- const CommandHeader header = {Memory::Read32(current_addr)};
- current_addr += sizeof(u32);
-
- switch (header.mode.Value()) {
- case SubmissionMode::IncreasingOld:
- case SubmissionMode::Increasing: {
- // Increase the method value with each argument.
- for (unsigned i = 0; i < header.arg_count; ++i) {
- WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
- current_addr += sizeof(u32);
- }
- break;
+ ASSERT(subchannel < bound_engines.size());
+
+ if (method == static_cast<u32>(BufferMethods::BindObject)) {
+ // Bind the current subchannel to the desired engine id.
+ LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
+ bound_engines[subchannel] = static_cast<EngineID>(value);
+ return;
}
- case SubmissionMode::NonIncreasingOld:
- case SubmissionMode::NonIncreasing: {
- // Use the same method value for all arguments.
- for (unsigned i = 0; i < header.arg_count; ++i) {
- WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
- current_addr += sizeof(u32);
- }
+
+ if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+ // TODO(Subv): Research and implement these methods.
+ LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+ return;
+ }
+
+ const EngineID engine = bound_engines[subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ fermi_2d->WriteReg(method, value);
+ break;
+ case EngineID::MAXWELL_B:
+ maxwell_3d->WriteReg(method, value, remaining_params);
break;
+ case EngineID::MAXWELL_COMPUTE_B:
+ maxwell_compute->WriteReg(method, value);
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ maxwell_dma->WriteReg(method, value);
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ kepler_memory->WriteReg(method, value);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine");
}
- case SubmissionMode::IncreaseOnce: {
- ASSERT(header.arg_count.Value() >= 1);
+ };
- // Use the original method for the first argument and then the next method for all other
- // arguments.
- WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - 1);
+ for (auto entry : commands) {
+ Tegra::GPUVAddr address = entry.Address();
+ u32 size = entry.sz;
+ const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+ VAddr current_addr = *head_address;
+ while (current_addr < *head_address + size * sizeof(CommandHeader)) {
+ const CommandHeader header = {Memory::Read32(current_addr)};
current_addr += sizeof(u32);
- for (unsigned i = 1; i < header.arg_count; ++i) {
- WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
- header.arg_count - i - 1);
+ switch (header.mode.Value()) {
+ case SubmissionMode::IncreasingOld:
+ case SubmissionMode::Increasing: {
+ // Increase the method value with each argument.
+ for (unsigned i = 0; i < header.arg_count; ++i) {
+ WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::NonIncreasingOld:
+ case SubmissionMode::NonIncreasing: {
+ // Use the same method value for all arguments.
+ for (unsigned i = 0; i < header.arg_count; ++i) {
+ WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::IncreaseOnce: {
+ ASSERT(header.arg_count.Value() >= 1);
+
+ // Use the original method for the first argument and then the next method for all
+ // other arguments.
+ WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - 1);
current_addr += sizeof(u32);
+
+ for (unsigned i = 1; i < header.arg_count; ++i) {
+ WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
+ header.arg_count - i - 1);
+ current_addr += sizeof(u32);
+ }
+ break;
+ }
+ case SubmissionMode::Inline: {
+ // The register value is stored in the bits 16-28 as an immediate
+ WriteReg(header.method, header.subchannel, header.inline_data, 0);
+ break;
+ }
+ default:
+ UNIMPLEMENTED();
}
- break;
- }
- case SubmissionMode::Inline: {
- // The register value is stored in the bits 16-28 as an immediate
- WriteReg(header.method, header.subchannel, header.inline_data, 0);
- break;
- }
- default:
- UNIMPLEMENTED();
}
}
}
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index a01153e0b..bd766e77a 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
#include <type_traits>
#include "common/bit_field.h"
#include "common/common_types.h"
+#include "video_core/memory_manager.h"
namespace Tegra {
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
IncreaseOnce = 5
};
+struct CommandListHeader {
+ u32 entry0; // gpu_va_lo
+ union {
+ u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+ BitField<0, 8, u32> gpu_va_hi;
+ BitField<8, 2, u32> unk1;
+ BitField<10, 21, u32> sz;
+ BitField<31, 1, u32> unk2;
+ };
+
+ GPUVAddr Address() const {
+ return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
+ }
+};
+static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
+
union CommandHeader {
u32 hex;
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index dcf9ef8b9..021b83eaa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -26,7 +26,7 @@ public:
void WriteReg(u32 method, u32 value);
struct Regs {
- static constexpr size_t NUM_REGS = 0x258;
+ static constexpr std::size_t NUM_REGS = 0x258;
struct Surface {
RenderTargetFormat format;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
new file mode 100644
index 000000000..66ae6332d
--- /dev/null
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -0,0 +1,45 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_memory.h"
+
+namespace Tegra::Engines {
+
+KeplerMemory::KeplerMemory(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+KeplerMemory::~KeplerMemory() = default;
+
+void KeplerMemory::WriteReg(u32 method, u32 value) {
+ ASSERT_MSG(method < Regs::NUM_REGS,
+ "Invalid KeplerMemory register, increase the size of the Regs structure");
+
+ regs.reg_array[method] = value;
+
+ switch (method) {
+ case KEPLERMEMORY_REG_INDEX(exec): {
+ state.write_offset = 0;
+ break;
+ }
+ case KEPLERMEMORY_REG_INDEX(data): {
+ ProcessData(value);
+ break;
+ }
+ }
+}
+
+void KeplerMemory::ProcessData(u32 data) {
+ ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
+ ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
+
+ GPUVAddr address = regs.dest.Address();
+ VAddr dest_address =
+ *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
+
+ Memory::Write32(dest_address, data);
+
+ state.write_offset++;
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
new file mode 100644
index 000000000..b0d0078cf
--- /dev/null
+++ b/src/video_core/engines/kepler_memory.h
@@ -0,0 +1,90 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Engines {
+
+#define KEPLERMEMORY_REG_INDEX(field_name) \
+ (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
+
+class KeplerMemory final {
+public:
+ KeplerMemory(MemoryManager& memory_manager);
+ ~KeplerMemory();
+
+ /// Write the value to the register identified by method.
+ void WriteReg(u32 method, u32 value);
+
+ struct Regs {
+ static constexpr size_t NUM_REGS = 0x7F;
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS(0x60);
+
+ u32 line_length_in;
+ u32 line_count;
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+ u32 pitch;
+ u32 block_dimensions;
+ u32 width;
+ u32 height;
+ u32 depth;
+ u32 z;
+ u32 x;
+ u32 y;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } dest;
+
+ struct {
+ union {
+ BitField<0, 1, u32> linear;
+ };
+ } exec;
+
+ u32 data;
+
+ INSERT_PADDING_WORDS(0x11);
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
+
+ struct {
+ u32 write_offset = 0;
+ } state{};
+
+private:
+ MemoryManager& memory_manager;
+
+ void ProcessData(u32 data);
+};
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(line_length_in, 0x60);
+ASSERT_REG_POSITION(line_count, 0x61);
+ASSERT_REG_POSITION(dest, 0x62);
+ASSERT_REG_POSITION(exec, 0x6C);
+ASSERT_REG_POSITION(data, 0x6D);
+#undef ASSERT_REG_POSITION
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 68ff1e86b..8afd26fe9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -5,6 +5,7 @@
#include <cinttypes>
#include "common/assert.h"
#include "core/core.h"
+#include "core/core_timing.h"
#include "core/memory.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/engines/maxwell_3d.h"
@@ -134,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
break;
}
- rasterizer.NotifyMaxwellRegisterChanged(method);
-
if (debug_context) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
}
@@ -194,8 +193,8 @@ void Maxwell3D::ProcessQueryGet() {
// wait queues.
LongQueryResult query_result{};
query_result.value = result;
- // TODO(Subv): Generate a real GPU timestamp and write it here instead of 0
- query_result.timestamp = 0;
+ // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
+ query_result.timestamp = CoreTiming::GetTicks();
Memory::WriteBlock(*address, &query_result, sizeof(query_result));
}
break;
@@ -249,8 +248,8 @@ void Maxwell3D::DrawArrays() {
void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
- auto& shader = state.shader_stages[static_cast<size_t>(stage)];
- auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)];
+ auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+ auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)];
auto& buffer = shader.const_buffers[bind_data.index];
@@ -292,10 +291,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
- ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
- (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
- "Texture types other than Texture2D are unimplemented");
-
auto r_type = tic_entry.r_type.Value();
auto g_type = tic_entry.g_type.Value();
auto b_type = tic_entry.b_type.Value();
@@ -321,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
std::vector<Texture::FullTextureInfo> textures;
- auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)];
+ auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
// Offset into the texture constbuffer where the texture info begins.
- static constexpr size_t TextureInfoOffset = 0x20;
+ static constexpr std::size_t TextureInfoOffset = 0x20;
for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
@@ -365,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
return textures;
}
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
- auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
+ std::size_t offset) const {
+ auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 92bfda053..b81b0723d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -34,17 +34,17 @@ public:
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
struct Regs {
- static constexpr size_t NUM_REGS = 0xE00;
-
- static constexpr size_t NumRenderTargets = 8;
- static constexpr size_t NumViewports = 16;
- static constexpr size_t NumCBData = 16;
- static constexpr size_t NumVertexArrays = 32;
- static constexpr size_t NumVertexAttributes = 32;
- static constexpr size_t MaxShaderProgram = 6;
- static constexpr size_t MaxShaderStage = 5;
+ static constexpr std::size_t NUM_REGS = 0xE00;
+
+ static constexpr std::size_t NumRenderTargets = 8;
+ static constexpr std::size_t NumViewports = 16;
+ static constexpr std::size_t NumCBData = 16;
+ static constexpr std::size_t NumVertexArrays = 32;
+ static constexpr std::size_t NumVertexAttributes = 32;
+ static constexpr std::size_t MaxShaderProgram = 6;
+ static constexpr std::size_t MaxShaderStage = 5;
// Maximum number of const buffers per shader stage.
- static constexpr size_t MaxConstBuffers = 18;
+ static constexpr std::size_t MaxConstBuffers = 18;
enum class QueryMode : u32 {
Write = 0,
@@ -127,6 +127,7 @@ public:
BitField<21, 6, Size> size;
BitField<27, 3, Type> type;
BitField<31, 1, u32> bgra;
+ u32 hex;
};
u32 ComponentCount() const {
@@ -262,6 +263,10 @@ public:
bool IsValid() const {
return size != Size::Invalid;
}
+
+ bool operator<(const VertexAttribute& other) const {
+ return hex < other.hex;
+ }
};
enum class PrimitiveTopology : u32 {
@@ -438,9 +443,9 @@ public:
}
};
- bool IsShaderConfigEnabled(size_t index) const {
+ bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
- if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) {
+ if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
return true;
}
return shader_config[index].enable != 0;
@@ -528,7 +533,11 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0x20);
+ INSERT_PADDING_WORDS(0x13);
+
+ u32 rt_separate_frag_data;
+
+ INSERT_PADDING_WORDS(0xC);
struct {
u32 address_high;
@@ -545,14 +554,29 @@ public:
INSERT_PADDING_WORDS(0x5B);
- VertexAttribute vertex_attrib_format[NumVertexAttributes];
+ std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
INSERT_PADDING_WORDS(0xF);
struct {
union {
BitField<0, 4, u32> count;
+ BitField<4, 3, u32> map_0;
+ BitField<7, 3, u32> map_1;
+ BitField<10, 3, u32> map_2;
+ BitField<13, 3, u32> map_3;
+ BitField<16, 3, u32> map_4;
+ BitField<19, 3, u32> map_5;
+ BitField<22, 3, u32> map_6;
+ BitField<25, 3, u32> map_7;
};
+
+ u32 GetMap(std::size_t index) const {
+ const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
+ map_4, map_5, map_6, map_7};
+ ASSERT(index < maps.size());
+ return maps[index];
+ }
} rt_control;
INSERT_PADDING_WORDS(0x2);
@@ -901,7 +925,7 @@ public:
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
/// Returns the texture information for a specific texture in a specific shader stage.
- Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
+ Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
private:
VideoCore::RasterizerInterface& rasterizer;
@@ -963,8 +987,9 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(zeta, 0x3F8);
-ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
+ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 6e740713f..aa7481b8c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -41,7 +41,6 @@ void MaxwellDMA::HandleCopy() {
// TODO(Subv): Perform more research and implement all features of this engine.
ASSERT(regs.exec.enable_swizzle == 0);
- ASSERT(regs.exec.enable_2d == 1);
ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
@@ -51,10 +50,19 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.dst_params.pos_y == 0);
if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
- Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
+ std::size_t copy_size = regs.x_count;
+
+ // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
+ // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
+ if (regs.exec.enable_2d) {
+ copy_size = copy_size * regs.y_count;
+ }
+
+ Memory::CopyBlock(dest_cpu, source_cpu, copy_size);
return;
}
+ ASSERT(regs.exec.enable_2d == 1);
u8* src_buffer = Memory::GetPointer(source_cpu);
u8* dst_buffer = Memory::GetPointer(dest_cpu);
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 7882f16e0..311ccb616 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -23,7 +23,7 @@ public:
void WriteReg(u32 method, u32 value);
struct Regs {
- static constexpr size_t NUM_REGS = 0x1D6;
+ static constexpr std::size_t NUM_REGS = 0x1D6;
struct Parameters {
union {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 3e4efbe0c..7e1de0fa1 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -20,10 +20,10 @@ namespace Tegra::Shader {
struct Register {
/// Number of registers
- static constexpr size_t NumRegisters = 256;
+ static constexpr std::size_t NumRegisters = 256;
/// Register 255 is special cased to always be 0
- static constexpr size_t ZeroIndex = 255;
+ static constexpr std::size_t ZeroIndex = 255;
enum class Size : u64 {
Byte = 0,
@@ -67,6 +67,13 @@ private:
u64 value{};
};
+enum class AttributeSize : u64 {
+ Word = 0,
+ DoubleWord = 1,
+ TripleWord = 2,
+ QuadWord = 3,
+};
+
union Attribute {
Attribute() = default;
@@ -76,6 +83,7 @@ union Attribute {
Position = 7,
Attribute_0 = 8,
Attribute_31 = 39,
+ PointCoord = 46,
// This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
// shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
// shader.
@@ -86,9 +94,10 @@ union Attribute {
};
union {
+ BitField<20, 10, u64> immediate;
BitField<22, 2, u64> element;
BitField<24, 6, Index> index;
- BitField<47, 3, u64> size;
+ BitField<47, 3, AttributeSize> size;
} fmt20;
union {
@@ -231,6 +240,41 @@ enum class FlowCondition : u64 {
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
};
+enum class ControlCode : u64 {
+ F = 0,
+ LT = 1,
+ EQ = 2,
+ LE = 3,
+ GT = 4,
+ NE = 5,
+ GE = 6,
+ Num = 7,
+ Nan = 8,
+ LTU = 9,
+ EQU = 10,
+ LEU = 11,
+ GTU = 12,
+ NEU = 13,
+ GEU = 14,
+ //
+ OFF = 16,
+ LO = 17,
+ SFF = 18,
+ LS = 19,
+ HI = 20,
+ SFT = 21,
+ HS = 22,
+ OFT = 23,
+ CSM_TA = 24,
+ CSM_TR = 25,
+ CSM_MX = 26,
+ FCSM_TA = 27,
+ FCSM_TR = 28,
+ FCSM_MX = 29,
+ RLE = 30,
+ RGT = 31,
+};
+
enum class PredicateResultMode : u64 {
None = 0x0,
NotZero = 0x3,
@@ -243,7 +287,47 @@ enum class TextureType : u64 {
TextureCube = 3,
};
-enum class IpaMode : u64 { Pass = 0, None = 1, Constant = 2, Sc = 3 };
+enum class TextureQueryType : u64 {
+ Dimension = 1,
+ TextureType = 2,
+ SamplePosition = 5,
+ Filter = 16,
+ LevelOfDetail = 18,
+ Wrap = 20,
+ BorderColor = 22,
+};
+
+enum class TextureProcessMode : u64 {
+ None = 0,
+ LZ = 1, // Unknown, appears to be the same as none.
+ LB = 2, // Load Bias.
+ LL = 3, // Load LOD (LevelOfDetail)
+ LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
+ LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL
+};
+
+enum class TextureMiscMode : u64 {
+ DC,
+ AOFFI, // Uses Offset
+ NDV,
+ NODEP,
+ MZ,
+ PTP,
+};
+
+enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
+enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
+
+struct IpaMode {
+ IpaInterpMode interpolation_mode;
+ IpaSampleMode sampling_mode;
+ inline bool operator==(const IpaMode& a) {
+ return (a.interpolation_mode == interpolation_mode) && (a.sampling_mode == sampling_mode);
+ }
+ inline bool operator!=(const IpaMode& a) {
+ return !((*this) == a);
+ }
+};
union Instruction {
Instruction& operator=(const Instruction& instr) {
@@ -328,10 +412,16 @@ union Instruction {
} alu;
union {
- BitField<54, 3, IpaMode> mode;
+ BitField<51, 1, u64> saturate;
+ BitField<52, 2, IpaSampleMode> sample_mode;
+ BitField<54, 2, IpaInterpMode> interp_mode;
} ipa;
union {
+ BitField<39, 2, u64> tab5cb8_2;
+ BitField<41, 3, u64> tab5c68_1;
+ BitField<44, 2, u64> tab5c68_0;
+ BitField<47, 1, u64> cc;
BitField<48, 1, u64> negate_b;
} fmul;
@@ -395,12 +485,54 @@ union Instruction {
} bfe;
union {
+ BitField<48, 3, u64> pred48;
+
+ union {
+ BitField<20, 20, u64> entry_a;
+ BitField<39, 5, u64> entry_b;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } imm;
+
+ union {
+ BitField<20, 14, u64> cb_index;
+ BitField<34, 5, u64> cb_offset;
+ BitField<56, 1, u64> neg;
+ BitField<57, 1, u64> uses_cc;
+ } hi;
+
+ union {
+ BitField<20, 14, u64> cb_index;
+ BitField<34, 5, u64> cb_offset;
+ BitField<39, 5, u64> entry_a;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } rz;
+
+ union {
+ BitField<39, 5, u64> entry_a;
+ BitField<45, 1, u64> neg;
+ BitField<46, 1, u64> uses_cc;
+ } r1;
+
+ union {
+ BitField<28, 8, u64> entry_a;
+ BitField<37, 1, u64> neg;
+ BitField<38, 1, u64> uses_cc;
+ } r2;
+
+ } lea;
+
+ union {
BitField<0, 5, FlowCondition> cond;
} flow;
union {
+ BitField<47, 1, u64> cc;
BitField<48, 1, u64> negate_b;
BitField<49, 1, u64> negate_c;
+ BitField<51, 2, u64> tab5980_1;
+ BitField<53, 2, u64> tab5980_0;
} ffma;
union {
@@ -446,6 +578,27 @@ union Instruction {
} psetp;
union {
+ BitField<12, 3, u64> pred12;
+ BitField<15, 1, u64> neg_pred12;
+ BitField<24, 2, PredOperation> cond;
+ BitField<29, 3, u64> pred29;
+ BitField<32, 1, u64> neg_pred29;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred39;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, PredOperation> op;
+ } pset;
+
+ union {
+ BitField<0, 3, u64> pred0;
+ BitField<3, 3, u64> pred3;
+ BitField<8, 5, ControlCode> cc; // flag in cc
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred39;
+ BitField<45, 4, PredOperation> op; // op with pred39
+ } csetp;
+
+ union {
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
@@ -490,25 +643,127 @@ union Instruction {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
BitField<31, 4, u64> component_mask;
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<54, 1, u64> aoffi_flag;
+ BitField<55, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(size_t component) const {
+ bool IsComponentEnabled(std::size_t component) const {
return ((1ull << component) & component_mask) != 0;
}
+
+ TextureProcessMode GetTextureProcessMode() const {
+ return process_mode;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return aoffi_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
} tex;
union {
+ BitField<22, 6, TextureQueryType> query_type;
+ BitField<31, 4, u64> component_mask;
+ BitField<49, 1, u64> nodep_flag;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
+ } txq;
+
+ union {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
+ BitField<31, 4, u64> component_mask;
+ BitField<35, 1, u64> ndv_flag;
+ BitField<49, 1, u64> nodep_flag;
+
+ bool IsComponentEnabled(std::size_t component) const {
+ return ((1ull << component) & component_mask) != 0;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NDV:
+ return (ndv_flag != 0);
+ case TextureMiscMode::NODEP:
+ return (nodep_flag != 0);
+ default:
+ break;
+ }
+ return false;
+ }
+ } tmml;
+
+ union {
+ BitField<28, 1, u64> array;
+ BitField<29, 2, TextureType> texture_type;
+ BitField<35, 1, u64> ndv_flag;
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<54, 2, u64> info;
BitField<56, 2, u64> component;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::NDV:
+ return ndv_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return info == 1;
+ case TextureMiscMode::PTP:
+ return info == 2;
+ default:
+ break;
+ }
+ return false;
+ }
} tld4;
union {
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<51, 1, u64> aoffi_flag;
BitField<52, 2, u64> component;
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return aoffi_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
} tld4s;
union {
BitField<0, 8, Register> gpr0;
BitField<28, 8, Register> gpr28;
+ BitField<49, 1, u64> nodep_flag;
BitField<50, 3, u64> component_mask_selector;
BitField<53, 4, u64> texture_info;
@@ -528,6 +783,37 @@ union Instruction {
UNREACHABLE();
}
+ TextureProcessMode GetTextureProcessMode() const {
+ switch (texture_info) {
+ case 0:
+ case 2:
+ case 6:
+ case 8:
+ case 9:
+ case 11:
+ return TextureProcessMode::LZ;
+ case 3:
+ case 5:
+ case 13:
+ return TextureProcessMode::LL;
+ default:
+ break;
+ }
+ return TextureProcessMode::None;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::DC:
+ return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
+
bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info >= 7 && texture_info <= 9;
@@ -537,7 +823,7 @@ union Instruction {
return gpr28.Value() != Register::ZeroIndex;
}
- bool IsComponentEnabled(size_t component) const {
+ bool IsComponentEnabled(std::size_t component) const {
static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
{},
{0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -545,7 +831,7 @@ union Instruction {
{0x7, 0xb, 0xd, 0xe, 0xf},
}};
- size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
+ std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
u32 mask = mask_lut[index][component_mask_selector];
@@ -556,6 +842,7 @@ union Instruction {
} texs;
union {
+ BitField<49, 1, u64> nodep_flag;
BitField<53, 4, u64> texture_info;
TextureType GetTextureType() const {
@@ -576,6 +863,26 @@ union Instruction {
UNREACHABLE();
}
+ TextureProcessMode GetTextureProcessMode() const {
+ if (texture_info == 1 || texture_info == 5 || texture_info == 12)
+ return TextureProcessMode::LL;
+ return TextureProcessMode::LZ;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::AOFFI:
+ return texture_info == 12 || texture_info == 4;
+ case TextureMiscMode::MZ:
+ return texture_info == 5;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
+
bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info == 8;
@@ -618,6 +925,7 @@ union Instruction {
BitField<36, 5, u64> index;
} cbuf36;
+ BitField<47, 1, u64> generates_cc;
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
BitField<59, 1, u64> is_c_gpr;
@@ -647,11 +955,13 @@ public:
LDG, // Load from global memory
STG, // Store in global memory
TEX,
- TEXQ, // Texture Query
- TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
- TLDS, // Texture Load with scalar/non-vec4 source/destinations
- TLD4, // Texture Load 4
- TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
+ TXQ, // Texture Query
+ TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
+ TLDS, // Texture Load with scalar/non-vec4 source/destinations
+ TLD4, // Texture Load 4
+ TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
+ TMML_B, // Texture Mip Map Level
+ TMML, // Texture Mip Map Level
EXIT,
IPA,
FFMA_IMM, // Fused Multiply and Add
@@ -676,6 +986,11 @@ public:
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
+ LEA_R1,
+ LEA_R2,
+ LEA_RZ,
+ LEA_IMM,
+ LEA_HI,
POPC_C,
POPC_R,
POPC_IMM,
@@ -734,6 +1049,8 @@ public:
ISET_C,
ISET_IMM,
PSETP,
+ PSET,
+ CSETP,
XMAD_IMM,
XMAD_CR,
XMAD_RC,
@@ -757,6 +1074,7 @@ public:
IntegerSet,
IntegerSetPredicate,
PredicateSetPredicate,
+ PredicateSetRegister,
Conversion,
Xmad,
Unknown,
@@ -821,7 +1139,7 @@ public:
private:
struct Detail {
private:
- static constexpr size_t opcode_bitsize = 16;
+ static constexpr std::size_t opcode_bitsize = 16;
/**
* Generates the mask and the expected value after masking from a given bitstring.
@@ -830,8 +1148,8 @@ private:
*/
static auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
- for (size_t i = 0; i < opcode_bitsize; i++) {
- const size_t bit_position = opcode_bitsize - i - 1;
+ for (std::size_t i = 0; i < opcode_bitsize; i++) {
+ const std::size_t bit_position = opcode_bitsize - i - 1;
switch (bitstring[i]) {
case '0':
mask |= 1 << bit_position;
@@ -871,11 +1189,13 @@ private:
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
- INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
+ INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
+ INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
+ INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -906,6 +1226,11 @@ private:
INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
+ INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
+ INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
+ INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
+ INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
+ INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -960,7 +1285,9 @@ private:
INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
+ INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
+ INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
new file mode 100644
index 000000000..a885ee3cf
--- /dev/null
+++ b/src/video_core/engines/shader_header.h
@@ -0,0 +1,103 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+
+namespace Tegra::Shader {
+
+enum class OutputTopology : u32 {
+ PointList = 1,
+ LineStrip = 6,
+ TriangleStrip = 7,
+};
+
+// Documentation in:
+// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
+struct Header {
+ union {
+ BitField<0, 5, u32> sph_type;
+ BitField<5, 5, u32> version;
+ BitField<10, 4, u32> shader_type;
+ BitField<14, 1, u32> mrt_enable;
+ BitField<15, 1, u32> kills_pixels;
+ BitField<16, 1, u32> does_global_store;
+ BitField<17, 4, u32> sass_version;
+ BitField<21, 5, u32> reserved;
+ BitField<26, 1, u32> does_load_or_store;
+ BitField<27, 1, u32> does_fp64;
+ BitField<28, 4, u32> stream_out_mask;
+ } common0;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_low_size;
+ BitField<24, 8, u32> per_patch_attribute_count;
+ } common1;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_high_size;
+ BitField<24, 8, u32> threads_per_input_primitive;
+ } common2;
+
+ union {
+ BitField<0, 24, u32> shader_local_memory_crs_size;
+ BitField<24, 4, OutputTopology> output_topology;
+ BitField<28, 4, u32> reserved;
+ } common3;
+
+ union {
+ BitField<0, 12, u32> max_output_vertices;
+ BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
+ BitField<24, 4, u32> reserved;
+ BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
+ } common4;
+
+ union {
+ struct {
+ INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES(5); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(1); // ImapReserved
+ INSERT_PADDING_BYTES(3); // OmapSystemValuesA
+ INSERT_PADDING_BYTES(1); // OmapSystemValuesB
+ INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // OmapColor
+ INSERT_PADDING_BYTES(2); // OmapSystemValuesC
+ INSERT_PADDING_BYTES(5); // OmapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(1); // OmapReserved
+ } vtg;
+
+ struct {
+ INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+ INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+ INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+ INSERT_PADDING_BYTES(2); // ImapColor
+ INSERT_PADDING_BYTES(2); // ImapSystemValuesC
+ INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+ INSERT_PADDING_BYTES(2); // ImapReserved
+ struct {
+ u32 target;
+ union {
+ BitField<0, 1, u32> sample_mask;
+ BitField<1, 1, u32> depth;
+ BitField<2, 30, u32> reserved;
+ };
+ } omap;
+ bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
+ const u32 bit = render_target * 4 + component;
+ return omap.target & (1 << bit);
+ }
+ } ps;
+ };
+};
+
+static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
+
+} // namespace Tegra::Shader
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e6d8e65c6..baa8b63b7 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_memory.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
#include "video_core/engines/maxwell_dma.h"
@@ -27,6 +28,7 @@ GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
+ kepler_memory = std::make_unique<Engines::KeplerMemory>(*memory_manager);
}
GPU::~GPU() = default;
@@ -66,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
case RenderTargetFormat::RGBA8_UINT:
case RenderTargetFormat::RGB10_A2_UNORM:
case RenderTargetFormat::BGRA8_UNORM:
+ case RenderTargetFormat::BGRA8_SRGB:
case RenderTargetFormat::RG16_UNORM:
case RenderTargetFormat::RG16_SNORM:
case RenderTargetFormat::RG16_UINT:
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 2c3dbd97b..5cc1e19ca 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -4,8 +4,9 @@
#pragma once
+#include <array>
#include <memory>
-#include <unordered_map>
+#include <vector>
#include "common/common_types.h"
#include "core/hle/service/nvflinger/buffer_queue.h"
#include "video_core/memory_manager.h"
@@ -26,6 +27,7 @@ enum class RenderTargetFormat : u32 {
RG32_FLOAT = 0xCB,
RG32_UINT = 0xCD,
BGRA8_UNORM = 0xCF,
+ BGRA8_SRGB = 0xD0,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
RGBA8_SRGB = 0xD6,
@@ -40,6 +42,7 @@ enum class RenderTargetFormat : u32 {
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
B5G6R5_UNORM = 0xE8,
+ BGR5A1_UNORM = 0xE9,
RG8_UNORM = 0xEA,
RG8_SNORM = 0xEB,
R16_UNORM = 0xEE,
@@ -67,6 +70,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
/// Returns the number of bytes per pixel of each depth format.
u32 DepthFormatBytesPerPixel(DepthFormat format);
+struct CommandListHeader;
class DebugContext;
/**
@@ -99,6 +103,7 @@ class Fermi2D;
class Maxwell3D;
class MaxwellCompute;
class MaxwellDMA;
+class KeplerMemory;
} // namespace Engines
enum class EngineID {
@@ -115,7 +120,7 @@ public:
~GPU();
/// Processes a command list stored at the specified address in GPU memory.
- void ProcessCommandList(GPUVAddr address, u32 size);
+ void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
/// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D();
@@ -130,13 +135,10 @@ public:
const Tegra::MemoryManager& MemoryManager() const;
private:
- /// Writes a single register in the engine bound to the specified subchannel
- void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
-
std::unique_ptr<Tegra::MemoryManager> memory_manager;
/// Mapping of command subchannels to their bound engine ids.
- std::unordered_map<u32, EngineID> bound_engines;
+ std::array<EngineID, 8> bound_engines = {};
/// 3D engine
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
@@ -146,6 +148,8 @@ private:
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
/// DMA engine
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+ /// Inline memory engine
+ std::unique_ptr<Engines::KeplerMemory> kepler_memory;
};
} // namespace Tegra
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 7d836b816..cee0baaf3 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -152,7 +152,7 @@ private:
boost::optional<u32>
delayed_pc; ///< Program counter to execute at after the delay slot is executed.
- static constexpr size_t NumMacroRegisters = 8;
+ static constexpr std::size_t NumMacroRegisters = 8;
/// General purpose macro registers.
std::array<u32, NumMacroRegisters> registers = {};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 9d78e8b6b..cd819d69f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -20,9 +20,6 @@ public:
/// Clear the current framebuffer
virtual void Clear() = 0;
- /// Notify rasterizer that the specified Maxwell register has been changed
- virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
-
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index be17a2b9c..0df3725c2 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -19,6 +19,7 @@ void RendererBase::RefreshBaseSettings() {
UpdateCurrentFramebufferLayout();
renderer_settings.use_framelimiter = Settings::values.use_frame_limit;
+ renderer_settings.set_background_color = true;
}
void RendererBase::UpdateCurrentFramebufferLayout() {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2a357f9d0..2cd0738ff 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -19,6 +19,7 @@ namespace VideoCore {
struct RendererSettings {
std::atomic_bool use_framelimiter{false};
+ std::atomic_bool set_background_color{false};
};
class RendererBase : NonCopyable {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
new file mode 100644
index 000000000..578aca789
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -0,0 +1,93 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <memory>
+
+#include "common/alignment.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+
+namespace OpenGL {
+
+OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
+
+GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
+ std::size_t alignment, bool cache) {
+ auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
+ const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+
+ // Cache management is a big overhead, so only cache entries with a given size.
+ // TODO: Figure out which size is the best for given games.
+ cache &= size >= 2048;
+
+ if (cache) {
+ auto entry = TryGet(*cpu_addr);
+ if (entry) {
+ if (entry->size >= size && entry->alignment == alignment) {
+ return entry->offset;
+ }
+ Unregister(entry);
+ }
+ }
+
+ AlignBuffer(alignment);
+ GLintptr uploaded_offset = buffer_offset;
+
+ Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+
+ buffer_ptr += size;
+ buffer_offset += size;
+
+ if (cache) {
+ auto entry = std::make_shared<CachedBufferEntry>();
+ entry->offset = uploaded_offset;
+ entry->size = size;
+ entry->alignment = alignment;
+ entry->addr = *cpu_addr;
+ Register(entry);
+ }
+
+ return uploaded_offset;
+}
+
+GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
+ std::size_t alignment) {
+ AlignBuffer(alignment);
+ std::memcpy(buffer_ptr, raw_pointer, size);
+ GLintptr uploaded_offset = buffer_offset;
+
+ buffer_ptr += size;
+ buffer_offset += size;
+ return uploaded_offset;
+}
+
+void OGLBufferCache::Map(std::size_t max_size) {
+ bool invalidate;
+ std::tie(buffer_ptr, buffer_offset_base, invalidate) =
+ stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
+ buffer_offset = buffer_offset_base;
+
+ if (invalidate) {
+ InvalidateAll();
+ }
+}
+void OGLBufferCache::Unmap() {
+ stream_buffer.Unmap(buffer_offset - buffer_offset_base);
+}
+
+GLuint OGLBufferCache::GetHandle() const {
+ return stream_buffer.GetHandle();
+}
+
+void OGLBufferCache::AlignBuffer(std::size_t alignment) {
+ // Align the offset, not the mapped pointer
+ GLintptr offset_aligned =
+ static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
+ buffer_ptr += offset_aligned - buffer_offset;
+ buffer_offset = offset_aligned;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
new file mode 100644
index 000000000..6c18461f4
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -0,0 +1,57 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <memory>
+
+#include "common/common_types.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_stream_buffer.h"
+
+namespace OpenGL {
+
+struct CachedBufferEntry final {
+ VAddr GetAddr() const {
+ return addr;
+ }
+
+ std::size_t GetSizeInBytes() const {
+ return size;
+ }
+
+ VAddr addr;
+ std::size_t size;
+ GLintptr offset;
+ std::size_t alignment;
+};
+
+class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+public:
+ explicit OGLBufferCache(std::size_t size);
+
+ GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+ bool cache = true);
+
+ GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
+
+ void Map(std::size_t max_size);
+ void Unmap();
+
+ GLuint GetHandle() const;
+
+protected:
+ void AlignBuffer(std::size_t alignment);
+
+private:
+ OGLStreamBuffer stream_buffer;
+
+ u8* buffer_ptr = nullptr;
+ GLintptr buffer_offset = 0;
+ GLintptr buffer_offset_base = 0;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7ce969f73..70fb54507 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <array>
#include <memory>
#include <string>
#include <string_view>
@@ -33,16 +34,19 @@ using PixelFormat = SurfaceParams::PixelFormat;
using SurfaceType = SurfaceParams::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_FS, "OpenGL", "Fragment Shader Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
+MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
- : emu_window{window}, screen_info{info}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {
+ : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
// Create sampler objects
- for (size_t i = 0; i < texture_samplers.size(); ++i) {
+ for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
@@ -55,6 +59,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
if (extension == "GL_ARB_direct_state_access") {
has_ARB_direct_state_access = true;
+ } else if (extension == "GL_ARB_multi_bind") {
+ has_ARB_multi_bind = true;
} else if (extension == "GL_ARB_separate_shader_objects") {
has_ARB_separate_shader_objects = true;
} else if (extension == "GL_ARB_vertex_attrib_binding") {
@@ -67,28 +73,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
state.clip_distance[0] = true;
- // Generate VAO and UBO
- sw_vao.Create();
- uniform_buffer.Create();
-
- state.draw.vertex_array = sw_vao.handle;
- state.draw.uniform_buffer = uniform_buffer.handle;
- state.Apply();
-
// Create render framebuffer
framebuffer.Create();
- hw_vao.Create();
-
- state.draw.vertex_buffer = stream_buffer.GetHandle();
-
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
- state.draw.vertex_array = hw_vao.handle;
state.Apply();
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());
-
glEnable(GL_BLEND);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
@@ -98,14 +89,60 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
RasterizerOpenGL::~RasterizerOpenGL() {}
-std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
- GLintptr buffer_offset) {
+void RasterizerOpenGL::SetupVertexArrays() {
MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
- state.draw.vertex_array = hw_vao.handle;
- state.draw.vertex_buffer = stream_buffer.GetHandle();
+ auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
+ auto& VAO = iter->second;
+
+ if (is_cache_miss) {
+ VAO.Create();
+ state.draw.vertex_array = VAO.handle;
+ state.Apply();
+
+ // The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
+ // around.
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());
+
+ // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
+ // Enables the first 16 vertex attributes always, as we don't know which ones are actually
+ // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
+ // for now to avoid OpenGL errors.
+ // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
+ // assume every shader uses them all.
+ for (unsigned index = 0; index < 16; ++index) {
+ const auto& attrib = regs.vertex_attrib_format[index];
+
+ // Ignore invalid attributes.
+ if (!attrib.IsValid())
+ continue;
+
+ const auto& buffer = regs.vertex_array[attrib.buffer];
+ LOG_TRACE(HW_GPU,
+ "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+ index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
+ attrib.offset.Value(), attrib.IsNormalized());
+
+ ASSERT(buffer.IsEnabled());
+
+ glEnableVertexAttribArray(index);
+ if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
+ attrib.type ==
+ Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
+ glVertexAttribIFormat(index, attrib.ComponentCount(),
+ MaxwellToGL::VertexType(attrib), attrib.offset);
+ } else {
+ glVertexAttribFormat(index, attrib.ComponentCount(),
+ MaxwellToGL::VertexType(attrib),
+ attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
+ }
+ glVertexAttribBinding(index, attrib.buffer);
+ }
+ }
+ state.draw.vertex_array = VAO.handle;
+ state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply();
// Upload all guest vertex arrays sequentially to our buffer
@@ -117,77 +154,35 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
Tegra::GPUVAddr start = vertex_array.StartAddress();
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
- if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
- start += vertex_array.stride * (gpu.state.current_instance / vertex_array.divisor);
- }
-
ASSERT(end > start);
- u64 size = end - start + 1;
-
- GLintptr vertex_buffer_offset;
- std::tie(array_ptr, buffer_offset, vertex_buffer_offset) =
- UploadMemory(array_ptr, buffer_offset, start, size);
+ const u64 size = end - start + 1;
+ const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
- glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
+ glBindVertexBuffer(index, buffer_cache.GetHandle(), vertex_buffer_offset,
vertex_array.stride);
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
- // Tell OpenGL that this is an instanced vertex buffer to prevent accessing different
- // indexes on each vertex. We do the instance indexing manually by incrementing the
- // start address of the vertex buffer.
- glVertexBindingDivisor(index, 1);
+ // Enable vertex buffer instancing with the specified divisor.
+ glVertexBindingDivisor(index, vertex_array.divisor);
} else {
// Disable the vertex buffer instancing.
glVertexBindingDivisor(index, 0);
}
}
-
- // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
- // Enables the first 16 vertex attributes always, as we don't know which ones are actually used
- // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now
- // to avoid OpenGL errors.
- // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
- // assume every shader uses them all.
- for (unsigned index = 0; index < 16; ++index) {
- auto& attrib = regs.vertex_attrib_format[index];
-
- // Ignore invalid attributes.
- if (!attrib.IsValid())
- continue;
-
- auto& buffer = regs.vertex_array[attrib.buffer];
- LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
- index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
- attrib.offset.Value(), attrib.IsNormalized());
-
- ASSERT(buffer.IsEnabled());
-
- glEnableVertexAttribArray(index);
- if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
- attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
- glVertexAttribIFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
- attrib.offset);
- } else {
- glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
- attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
- }
- glVertexAttribBinding(index, attrib.buffer);
- }
-
- return {array_ptr, buffer_offset};
}
-std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+void RasterizerOpenGL::SetupShaders() {
+ MICROPROFILE_SCOPE(OpenGL_Shader);
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
- for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- auto& shader_config = gpu.regs.shader_config[index];
+ for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ const auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
@@ -195,21 +190,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
continue;
}
- std::tie(buffer_ptr, buffer_offset) =
- AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment));
-
- const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
+ const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
- std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
+ const GLintptr offset = buffer_cache.UploadHostMemory(
+ &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
// Bind the buffer
- glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset,
- sizeof(ubo));
-
- buffer_ptr += sizeof(ubo);
- buffer_offset += sizeof(ubo);
+ glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
Shader shader{shader_cache.GetStageProgram(program)};
@@ -230,9 +219,8 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
}
// Configure the const buffers for this shader stage.
- std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) =
- SetupConstBuffers(buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage),
- shader, current_constbuffer_bindpoint);
+ current_constbuffer_bindpoint = SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage),
+ shader, current_constbuffer_bindpoint);
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
@@ -245,15 +233,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
}
}
- shader_program_manager->UseTrivialGeometryShader();
+ state.Apply();
- return {buffer_ptr, buffer_offset};
+ shader_program_manager->UseTrivialGeometryShader();
}
-size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
+std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- size_t size = 0;
+ std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
if (!regs.vertex_array[index].IsEnabled())
continue;
@@ -309,60 +297,80 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
cached_pages.add({pages_interval, delta});
}
-std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
- bool using_depth_fb,
- bool preserve_contents) {
+void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
+ bool preserve_contents,
+ boost::optional<std::size_t> single_color_target) {
+ MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
- LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
- using_color_fb = false;
+ Surface depth_surface;
+ if (using_depth_fb) {
+ depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
}
- const bool has_stencil = regs.stencil_enable;
- const bool write_color_fb =
- state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
- state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
+ // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
+ // used to enable multiple render targets. However, it is left unset on all games that I have
+ // tested.
+ ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
- const bool write_depth_fb =
- (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
- (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask));
-
- Surface color_surface;
- Surface depth_surface;
- MathUtil::Rectangle<u32> surfaces_rect;
- std::tie(color_surface, depth_surface, surfaces_rect) =
- res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents);
+ // Bind the framebuffer surfaces
+ state.draw.draw_framebuffer = framebuffer.handle;
+ state.Apply();
- const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
- const MathUtil::Rectangle<u32> draw_rect{
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
- surfaces_rect.left, surfaces_rect.right)), // Left
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
- surfaces_rect.bottom, surfaces_rect.top)), // Top
- static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
- surfaces_rect.left, surfaces_rect.right)), // Right
- static_cast<u32>(
- std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
- surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+ if (using_color_fb) {
+ if (single_color_target) {
+ // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
+ Surface color_surface =
+ res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
+ color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
+ glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+ } else {
+ // Multiple color attachments are enabled
+ std::array<GLenum, Maxwell::NumRenderTargets> buffers;
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
+ buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+ glFramebufferTexture2D(
+ GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+ GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
+ 0);
+ }
+ glDrawBuffers(regs.rt_control.count, buffers.data());
+ }
+ } else {
+ // No color attachments are enabled - zero out all of them
+ for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
+ 0, 0);
+ }
+ glDrawBuffer(GL_NONE);
+ }
- // Bind the framebuffer surfaces
- BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
+ if (depth_surface) {
+ if (regs.stencil_enable) {
+ // Attach both depth and stencil
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->Texture().handle, 0);
+ } else {
+ // Attach depth
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ depth_surface->Texture().handle, 0);
+ // Clear stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+ }
+ } else {
+ // Clear both depth and stencil attachment
+ glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+ 0);
+ }
- SyncViewport(surfaces_rect);
+ SyncViewport();
- // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
- // scissor test to prevent drawing outside of the framebuffer region
- state.scissor.enabled = true;
- state.scissor.x = draw_rect.left;
- state.scissor.y = draw_rect.bottom;
- state.scissor.width = draw_rect.GetWidth();
- state.scissor.height = draw_rect.GetHeight();
state.Apply();
-
- // Only return the surface to be marked as dirty if writing to it is enabled.
- return std::make_pair(write_color_fb ? color_surface : nullptr,
- write_depth_fb ? depth_surface : nullptr);
}
void RasterizerOpenGL::Clear() {
@@ -370,32 +378,24 @@ void RasterizerOpenGL::Clear() {
SCOPE_EXIT({ prev_state.Apply(); });
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- bool use_color_fb = false;
- bool use_depth_fb = false;
+ bool use_color{};
+ bool use_depth{};
+ bool use_stencil{};
OpenGLState clear_state;
- clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
+ clear_state.draw.draw_framebuffer = framebuffer.handle;
clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
- GLbitfield clear_mask{};
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
- if (regs.clear_buffers.RT == 0) {
- // We only support clearing the first color attachment for now
- clear_mask |= GL_COLOR_BUFFER_BIT;
- use_color_fb = true;
- } else {
- // TODO(subv): Add support for the other color attachments
- LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT);
- }
+ use_color = true;
}
if (regs.clear_buffers.Z) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
- use_depth_fb = true;
- clear_mask |= GL_DEPTH_BUFFER_BIT;
+ use_depth = true;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
// ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -404,59 +404,33 @@ void RasterizerOpenGL::Clear() {
}
if (regs.clear_buffers.S) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
- use_depth_fb = true;
- clear_mask |= GL_STENCIL_BUFFER_BIT;
+ use_stencil = true;
clear_state.stencil.test_enabled = true;
}
- if (!use_color_fb && !use_depth_fb) {
+ if (!use_color && !use_depth && !use_stencil) {
// No color surface nor depth/stencil surface are enabled
return;
}
- if (clear_mask == 0) {
- // No clear mask is enabled
- return;
- }
-
ScopeAcquireGLContext acquire_context{emu_window};
- auto [dirty_color_surface, dirty_depth_surface] =
- ConfigureFramebuffers(use_color_fb, use_depth_fb, false);
+ ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+ regs.clear_buffers.RT.Value());
clear_state.Apply();
- glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
- regs.clear_color[3]);
- glClearDepth(regs.clear_depth);
- glClearStencil(regs.clear_stencil);
-
- glClear(clear_mask);
-}
-
-std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
- size_t alignment) {
- // Align the offset, not the mapped pointer
- GLintptr offset_aligned =
- static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
- return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned};
-}
-
-std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr,
- GLintptr buffer_offset,
- Tegra::GPUVAddr gpu_addr,
- size_t size, size_t alignment) {
- std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
- GLintptr uploaded_offset = buffer_offset;
-
- auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
- Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
-
- buffer_ptr += size;
- buffer_offset += size;
+ if (use_color) {
+ glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+ }
- return {buffer_ptr, buffer_offset, uploaded_offset};
+ if (use_depth && use_stencil) {
+ glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
+ } else if (use_depth) {
+ glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
+ } else if (use_stencil) {
+ glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
+ }
}
void RasterizerOpenGL::DrawArrays() {
@@ -464,12 +438,12 @@ void RasterizerOpenGL::DrawArrays() {
return;
MICROPROFILE_SCOPE(OpenGL_Drawing);
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
ScopeAcquireGLContext acquire_context{emu_window};
- auto [dirty_color_surface, dirty_depth_surface] =
- ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true);
+ ConfigureFramebuffers();
SyncDepthTestState();
SyncStencilTestState();
@@ -482,43 +456,46 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
- const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
+ const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) *
+ static_cast<u64>(regs.index_array.FormatSizeInBytes())};
- state.draw.vertex_buffer = stream_buffer.GetHandle();
+ state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply();
- size_t buffer_size = CalculateVertexArraysSize();
+ std::size_t buffer_size = CalculateVertexArraysSize();
if (is_indexed) {
- buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size;
+ buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
}
// Uniform space for the 5 shader stages
buffer_size =
- Common::AlignUp<size_t>(buffer_size, 4) +
+ Common::AlignUp<std::size_t>(buffer_size, 4) +
(sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
// Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
- u8* buffer_ptr;
- GLintptr buffer_offset;
- std::tie(buffer_ptr, buffer_offset, std::ignore) =
- stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
- u8* buffer_ptr_base = buffer_ptr;
+ buffer_cache.Map(buffer_size);
- std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
+ SetupVertexArrays();
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
- std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory(
- buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size);
+ MICROPROFILE_SCOPE(OpenGL_Index);
+
+ // Adjust the index buffer offset so it points to the first desired index.
+ auto index_start = regs.index_array.StartAddress();
+ index_start += static_cast<size_t>(regs.index_array.first) *
+ static_cast<size_t>(regs.index_array.FormatSizeInBytes());
+
+ index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
}
- std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset);
+ SetupShaders();
- stream_buffer.Unmap(buffer_ptr - buffer_ptr_base);
+ buffer_cache.Unmap();
shader_program_manager->ApplyTo(state);
state.Apply();
@@ -527,14 +504,26 @@ void RasterizerOpenGL::DrawArrays() {
if (is_indexed) {
const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
- // Adjust the index buffer offset so it points to the first desired index.
- index_buffer_offset += regs.index_array.first * regs.index_array.FormatSizeInBytes();
-
- glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
- MaxwellToGL::IndexFormat(regs.index_array.format),
- reinterpret_cast<const void*>(index_buffer_offset), base_vertex);
+ if (gpu.state.current_instance > 0) {
+ glDrawElementsInstancedBaseVertexBaseInstance(
+ primitive_mode, regs.index_array.count,
+ MaxwellToGL::IndexFormat(regs.index_array.format),
+ reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex,
+ gpu.state.current_instance);
+ } else {
+ glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
+ MaxwellToGL::IndexFormat(regs.index_array.format),
+ reinterpret_cast<const void*>(index_buffer_offset),
+ base_vertex);
+ }
} else {
- glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
+ if (gpu.state.current_instance > 0) {
+ glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first,
+ regs.vertex_buffer.count, 1,
+ gpu.state.current_instance);
+ } else {
+ glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
+ }
}
// Disable scissor test
@@ -549,24 +538,18 @@ void RasterizerOpenGL::DrawArrays() {
state.Apply();
}
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
+void RasterizerOpenGL::FlushAll() {}
-void RasterizerOpenGL::FlushAll() {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-}
-
-void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-}
+void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size);
+ buffer_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
- MICROPROFILE_SCOPE(OpenGL_CacheManagement);
InvalidateRegion(addr, size);
}
@@ -614,7 +597,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
void RasterizerOpenGL::SamplerInfo::Create() {
sampler.Create();
mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear;
- wrap_u = wrap_v = Tegra::Texture::WrapMode::Wrap;
+ wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap;
// default is GL_LINEAR_MIPMAP_LINEAR
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@@ -622,7 +605,7 @@ void RasterizerOpenGL::SamplerInfo::Create() {
}
void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
- GLuint s = sampler.handle;
+ const GLuint s = sampler.handle;
if (mag_filter != config.mag_filter) {
mag_filter = config.mag_filter;
@@ -641,8 +624,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
wrap_v = config.wrap_v;
glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
}
+ if (wrap_p != config.wrap_p) {
+ wrap_p = config.wrap_p;
+ glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
+ }
- if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border) {
+ if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border ||
+ wrap_p == Tegra::Texture::WrapMode::Border) {
const GLvec4 new_border_color = {{config.border_color_r, config.border_color_g,
config.border_color_b, config.border_color_a}};
if (border_color != new_border_color) {
@@ -652,26 +640,35 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
-std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_ptr,
- GLintptr buffer_offset,
- Maxwell::ShaderStage stage,
- Shader& shader,
- u32 current_bindpoint) {
+u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
+ u32 current_bindpoint) {
+ MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
- const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
+ const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
const auto& entries = shader->GetShaderEntries().const_buffer_entries;
+ constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers;
+ std::array<GLuint, max_binds> bind_buffers;
+ std::array<GLintptr, max_binds> bind_offsets;
+ std::array<GLsizeiptr, max_binds> bind_sizes;
+
+ ASSERT_MSG(entries.size() <= max_binds, "Exceeded expected number of binding points.");
+
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& used_buffer = entries[bindpoint];
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
if (!buffer.enabled) {
+ // With disabled buffers set values as zero to unbind them
+ bind_buffers[bindpoint] = 0;
+ bind_offsets[bindpoint] = 0;
+ bind_sizes[bindpoint] = 0;
continue;
}
- size_t size = 0;
+ std::size_t size = 0;
if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing
@@ -692,26 +689,28 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_pt
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
- GLintptr const_buffer_offset;
- std::tie(buffer_ptr, buffer_offset, const_buffer_offset) =
- UploadMemory(buffer_ptr, buffer_offset, buffer.address, size,
- static_cast<size_t>(uniform_buffer_alignment));
-
- glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
- stream_buffer.GetHandle(), const_buffer_offset, size);
+ GLintptr const_buffer_offset = buffer_cache.UploadMemory(
+ buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
// Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(),
- shader->GetProgramResourceIndex(used_buffer.GetName()),
+ shader->GetProgramResourceIndex(used_buffer),
current_bindpoint + bindpoint);
+
+ // Prepare values for multibind
+ bind_buffers[bindpoint] = buffer_cache.GetHandle();
+ bind_offsets[bindpoint] = const_buffer_offset;
+ bind_sizes[bindpoint] = size;
}
- state.Apply();
+ glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
+ bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
- return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())};
+ return current_bindpoint + static_cast<u32>(entries.size());
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) {
+ MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().texture_samplers;
@@ -721,24 +720,25 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- u32 current_bindpoint = current_unit + bindpoint;
+ const u32 current_bindpoint = current_unit + bindpoint;
// Bind the uniform to the sampler.
- glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry.GetName()),
+ glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry),
current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
if (!texture.enabled) {
- state.texture_units[current_bindpoint].texture_2d = 0;
+ state.texture_units[current_bindpoint].texture = 0;
continue;
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
- state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
+ state.texture_units[current_bindpoint].texture = surface->Texture().handle;
+ state.texture_units[current_bindpoint].target = surface->Target();
state.texture_units[current_bindpoint].swizzle.r =
MaxwellToGL::SwizzleSource(texture.tic.x_source);
state.texture_units[current_bindpoint].swizzle.g =
@@ -749,47 +749,19 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
MaxwellToGL::SwizzleSource(texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
- state.texture_units[current_bindpoint].texture_2d = 0;
+ state.texture_units[current_bindpoint].texture = 0;
}
}
- state.Apply();
-
return current_unit + static_cast<u32>(entries.size());
}
-void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
- const Surface& depth_surface, bool has_stencil) {
- state.draw.draw_framebuffer = framebuffer.handle;
- state.Apply();
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
- if (depth_surface != nullptr) {
- if (has_stencil) {
- // attach both depth and stencil
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- } else {
- // attach depth
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- depth_surface->Texture().handle, 0);
- // clear stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- }
- } else {
- // clear both depth and stencil attachment
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
- }
-}
-
-void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
+void RasterizerOpenGL::SyncViewport() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
- state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
- state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
+ state.viewport.x = viewport_rect.left;
+ state.viewport.y = viewport_rect.bottom;
state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 30045ebff..bf9560bdc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -6,19 +6,23 @@
#include <array>
#include <cstddef>
+#include <map>
#include <memory>
#include <tuple>
#include <utility>
#include <vector>
#include <boost/icl/interval_map.hpp>
+#include <boost/optional.hpp>
#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -42,7 +46,6 @@ public:
void DrawArrays() override;
void Clear() override;
- void NotifyMaxwellRegisterChanged(u32 method) override;
void FlushAll() override;
void FlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
@@ -70,7 +73,7 @@ public:
};
/// Maximum supported size that a constbuffer can have in bytes.
- static constexpr size_t MaxConstbufferSize = 0x10000;
+ static constexpr std::size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
@@ -90,17 +93,20 @@ private:
Tegra::Texture::TextureFilter min_filter;
Tegra::Texture::WrapMode wrap_u;
Tegra::Texture::WrapMode wrap_v;
+ Tegra::Texture::WrapMode wrap_p;
GLvec4 border_color;
};
- /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
- /// surfaces if writing was enabled.
- std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
- bool preserve_contents);
-
- /// Binds the framebuffer color and depth surface
- void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
- bool has_stencil);
+ /**
+ * Configures the color and depth framebuffer states.
+ * @param use_color_fb If true, configure color framebuffers.
+ * @param using_depth_fb If true, configure the depth/stencil framebuffer.
+ * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
+ * @param single_color_target Specifies if a single color buffer target should be used.
+ */
+ void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
+ bool preserve_contents = true,
+ boost::optional<std::size_t> single_color_target = {});
/*
* Configures the current constbuffers to use for the draw command.
@@ -109,9 +115,8 @@ private:
* @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage.
*/
- std::tuple<u8*, GLintptr, u32> SetupConstBuffers(
- u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- Shader& shader, u32 current_bindpoint);
+ u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
+ u32 current_bindpoint);
/*
* Configures the current textures to use for the draw command.
@@ -124,7 +129,7 @@ private:
u32 current_unit);
/// Syncs the viewport to match the guest state
- void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
+ void SyncViewport();
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled();
@@ -154,6 +159,7 @@ private:
void SyncLogicOpState();
bool has_ARB_direct_state_access = false;
+ bool has_ARB_multi_bind = false;
bool has_ARB_separate_shader_objects = false;
bool has_ARB_vertex_attrib_binding = false;
@@ -167,28 +173,23 @@ private:
ScreenInfo& screen_info;
std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
- OGLVertexArray sw_vao;
- OGLVertexArray hw_vao;
+ std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
+ Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,
+ OGLVertexArray>
+ vertex_array_cache;
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
- static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
- OGLStreamBuffer stream_buffer;
- OGLBuffer uniform_buffer;
+ static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
+ OGLBufferCache buffer_cache;
OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment;
- size_t CalculateVertexArraysSize() const;
-
- std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
-
- std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
+ std::size_t CalculateVertexArraysSize() const;
- std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment);
+ void SetupVertexArrays();
- std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset,
- Tegra::GPUVAddr gpu_addr, size_t size,
- size_t alignment = 4);
+ void SetupShaders();
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 1965ab7d5..86682d7cb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -7,6 +7,7 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h"
@@ -52,14 +53,30 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
params.unaligned_height = config.tic.Height();
+ params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+
+ switch (params.target) {
+ case SurfaceTarget::Texture1D:
+ case SurfaceTarget::Texture2D:
+ params.depth = 1;
+ break;
+ case SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture2DArray:
+ params.depth = config.tic.Depth();
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
+ UNREACHABLE();
+ params.depth = 1;
+ break;
+ }
+
params.size_in_bytes = params.SizeInBytes();
- params.cache_width = Common::AlignUp(params.width, 16);
- params.cache_height = Common::AlignUp(params.height, 16);
return params;
}
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
- const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
+ const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
SurfaceParams params{};
params.addr = TryGetCpuAddr(config.Address());
params.is_tiled = true;
@@ -70,9 +87,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.width = config.width;
params.height = config.height;
params.unaligned_height = config.height;
+ params.target = SurfaceTarget::Texture2D;
+ params.depth = 1;
params.size_in_bytes = params.SizeInBytes();
- params.cache_width = Common::AlignUp(params.width, 16);
- params.cache_height = Common::AlignUp(params.height, 16);
return params;
}
@@ -86,13 +103,12 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
- params.size_in_bytes = params.SizeInBytes();
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
+ params.target = SurfaceTarget::Texture2D;
+ params.depth = 1;
params.size_in_bytes = params.SizeInBytes();
- params.cache_width = Common::AlignUp(params.width, 16);
- params.cache_height = Common::AlignUp(params.height, 16);
return params;
}
@@ -100,7 +116,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
- {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
+ {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
false}, // A2B10G10R10U
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
@@ -151,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -166,8 +183,28 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
ComponentType::Float, false}, // Z32FS8
}};
+static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
+ switch (target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ return GL_TEXTURE_1D;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ return GL_TEXTURE_2D;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ return GL_TEXTURE_3D;
+ case SurfaceParams::SurfaceTarget::Texture1DArray:
+ return GL_TEXTURE_1D_ARRAY;
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ return GL_TEXTURE_2D_ARRAY;
+ case SurfaceParams::SurfaceTarget::TextureCubemap:
+ return GL_TEXTURE_CUBE_MAP;
+ }
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
+ UNREACHABLE();
+ return {};
+}
+
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
- ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+ ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
ASSERT(component_type == format.component_type);
@@ -177,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
static bool IsPixelFormatASTC(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
+ case PixelFormat::ASTC_2D_8X8:
return true;
default:
return false;
@@ -187,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
switch (format) {
case PixelFormat::ASTC_2D_4X4:
return {4, 4};
+ case PixelFormat::ASTC_2D_8X8:
+ return {8, 8};
default:
LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
UNREACHABLE();
@@ -220,7 +260,8 @@ static bool IsFormatBCn(PixelFormat format) {
}
template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, VAddr addr) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size,
+ VAddr addr) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
@@ -230,18 +271,18 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
addr, tile_size, bytes_per_pixel, stride, height, block_height);
- const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
- gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
+ const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
+ memcpy(gl_buffer, data.data(), size_to_copy);
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
// check the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
- Memory::GetPointer(addr), gl_buffer.data(), morton_to_gl);
+ Memory::GetPointer(addr), gl_buffer, morton_to_gl);
}
}
-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
// clang-format off
@@ -290,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
MortonCopy<true, PixelFormat::RG8S>,
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
@@ -298,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
// clang-format on
};
-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
// clang-format off
@@ -349,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::R32UI>,
+ nullptr,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
@@ -357,33 +400,6 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
// clang-format on
};
-// Allocate an uninitialized texture of appropriate size and format for the surface
-static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width,
- u32 height) {
- OpenGLState cur_state = OpenGLState::GetCurState();
-
- // Keep track of previous texture bindings
- GLuint old_tex = cur_state.texture_units[0].texture_2d;
- cur_state.texture_units[0].texture_2d = texture;
- cur_state.Apply();
- glActiveTexture(GL_TEXTURE0);
-
- if (!format_tuple.compressed) {
- // Only pre-create the texture for non-compressed textures.
- glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
- format_tuple.format, format_tuple.type, nullptr);
- }
-
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-
- // Restore previous texture bindings
- cur_state.texture_units[0].texture_2d = old_tex;
- cur_state.Apply();
-}
-
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
GLuint read_fb_handle, GLuint draw_fb_handle) {
@@ -438,12 +454,53 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
return true;
}
-CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
+CachedSurface::CachedSurface(const SurfaceParams& params)
+ : params(params), gl_target(SurfaceTargetToGL(params.target)) {
texture.Create();
const auto& rect{params.GetRect()};
- AllocateSurfaceTexture(texture.handle,
- GetFormatTuple(params.pixel_format, params.component_type),
+
+ // Keep track of previous texture bindings
+ OpenGLState cur_state = OpenGLState::GetCurState();
+ const auto& old_tex = cur_state.texture_units[0];
+ SCOPE_EXIT({
+ cur_state.texture_units[0] = old_tex;
+ cur_state.Apply();
+ });
+
+ cur_state.texture_units[0].texture = texture.handle;
+ cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
+ cur_state.Apply();
+ glActiveTexture(GL_TEXTURE0);
+
+ const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
+ if (!format_tuple.compressed) {
+ // Only pre-create the texture for non-compressed textures.
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+ rect.GetWidth());
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
rect.GetWidth(), rect.GetHeight());
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+ rect.GetWidth(), rect.GetHeight(), params.depth);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
+ rect.GetHeight());
+ }
+ }
+
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
@@ -461,10 +518,10 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
S8Z24 input_pixel{};
Z24S8 output_pixel{};
- const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
- for (size_t y = 0; y < height; ++y) {
- for (size_t x = 0; x < width; ++x) {
- const size_t offset{bpp * (y * width + x)};
+ constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
+ for (std::size_t y = 0; y < height; ++y) {
+ for (std::size_t x = 0; x < width; ++x) {
+ const std::size_t offset{bpp * (y * width + x)};
std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
output_pixel.s8.Assign(input_pixel.s8);
output_pixel.z24.Assign(input_pixel.z24);
@@ -474,10 +531,10 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
}
static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
- const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
- for (size_t y = 0; y < height; ++y) {
- for (size_t x = 0; x < width; ++x) {
- const size_t offset{bpp * (y * width + x)};
+ constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
+ for (std::size_t y = 0; y < height; ++y) {
+ for (std::size_t x = 0; x < width; ++x) {
+ const std::size_t offset{bpp * (y * width + x)};
const u8 temp{data[offset]};
data[offset] = data[offset + 1];
data[offset + 1] = temp;
@@ -493,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
u32 width, u32 height) {
switch (pixel_format) {
- case PixelFormat::ASTC_2D_4X4: {
+ case PixelFormat::ASTC_2D_4X4:
+ case PixelFormat::ASTC_2D_8X8: {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};
@@ -514,23 +572,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
}
}
-/**
- * Helper function to perform software conversion (as needed) when flushing a buffer to Switch
- * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
- * typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& /*data*/, PixelFormat pixel_format,
- u32 /*width*/, u32 /*height*/) {
- switch (pixel_format) {
- case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::S8Z24:
- LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}",
- static_cast<u32>(pixel_format));
- UNREACHABLE();
- break;
- }
-}
-
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
void CachedSurface::LoadGLBuffer() {
ASSERT(params.type != SurfaceType::Fill);
@@ -545,13 +586,25 @@ void CachedSurface::LoadGLBuffer() {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
if (params.is_tiled) {
- gl_buffer.resize(copy_size);
+ // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
+ // this for 3D textures, etc.
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ // Pass impl. to the fallback code below
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ }
- morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer, params.addr);
+ gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size);
+ morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
+ params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
+ params.addr);
} else {
- const u8* const texture_src_data_end = texture_src_data + copy_size;
-
+ const u8* const texture_src_data_end{texture_src_data +
+ (static_cast<std::size_t>(params.depth) * copy_size)};
gl_buffer.assign(texture_src_data, texture_src_data_end);
}
@@ -560,23 +613,7 @@ void CachedSurface::LoadGLBuffer() {
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer() {
- u8* const dst_buffer = Memory::GetPointer(params.addr);
-
- ASSERT(dst_buffer);
- ASSERT(gl_buffer.size() ==
- params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
-
- MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
-
- ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
- params.height);
-
- if (!params.is_tiled) {
- std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
- } else {
- gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, gl_buffer, params.addr);
- }
+ ASSERT_MSG(false, "Unimplemented");
}
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
@@ -586,22 +623,30 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
MICROPROFILE_SCOPE(OpenGL_TextureUL);
- ASSERT(gl_buffer.size() ==
- params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
+ ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height *
+ GetGLBytesPerPixel(params.pixel_format) * params.depth);
const auto& rect{params.GetRect()};
// Load data from memory to the surface
- GLint x0 = static_cast<GLint>(rect.left);
- GLint y0 = static_cast<GLint>(rect.bottom);
- size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format);
+ const GLint x0 = static_cast<GLint>(rect.left);
+ const GLint y0 = static_cast<GLint>(rect.bottom);
+ const std::size_t buffer_offset =
+ static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
+ static_cast<std::size_t>(x0)) *
+ GetGLBytesPerPixel(params.pixel_format);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- GLuint target_tex = texture.handle;
+ const GLuint target_tex = texture.handle;
OpenGLState cur_state = OpenGLState::GetCurState();
- GLuint old_tex = cur_state.texture_units[0].texture_2d;
- cur_state.texture_units[0].texture_2d = target_tex;
+ const auto& old_tex = cur_state.texture_units[0];
+ SCOPE_EXIT({
+ cur_state.texture_units[0] = old_tex;
+ cur_state.Apply();
+ });
+ cur_state.texture_units[0].texture = target_tex;
+ cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
@@ -610,136 +655,102 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
glActiveTexture(GL_TEXTURE0);
if (tuple.compressed) {
- glCompressedTexImage2D(
- GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
- static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
- &gl_buffer[buffer_offset]);
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glCompressedTexImage2D(
+ SurfaceTargetToGL(params.target), 0, tuple.internal_format,
+ static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
+ static_cast<GLsizei>(params.size_in_bytes), &gl_buffer[buffer_offset]);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glCompressedTexImage3D(
+ SurfaceTargetToGL(params.target), 0, tuple.internal_format,
+ static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
+ static_cast<GLsizei>(params.depth), 0, static_cast<GLsizei>(params.size_in_bytes),
+ &gl_buffer[buffer_offset]);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ glCompressedTexImage2D(
+ GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
+ static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
+ &gl_buffer[buffer_offset]);
+ }
} else {
- glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
- static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
- &gl_buffer[buffer_offset]);
- }
-
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-
- cur_state.texture_units[0].texture_2d = old_tex;
- cur_state.Apply();
-}
-
-MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
-void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
- if (params.type == SurfaceType::Fill)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_TextureDL);
-
- gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
-
- OpenGLState state = OpenGLState::GetCurState();
- OpenGLState prev_state = state;
- SCOPE_EXIT({ prev_state.Apply(); });
-
- const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-
- // Ensure no bad interactions with GL_PACK_ALIGNMENT
- ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
- glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
-
- const auto& rect{params.GetRect()};
- size_t buffer_offset =
- (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format);
-
- state.UnbindTexture(texture.handle);
- state.draw.read_framebuffer = read_fb_handle;
- state.Apply();
- if (params.type == SurfaceType::ColorTexture) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- texture.handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
- } else if (params.type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- texture.handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
- } else {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- texture.handle, 0);
+ switch (params.target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ glTexSubImage1D(SurfaceTargetToGL(params.target), 0, x0,
+ static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glTexSubImage2D(SurfaceTargetToGL(params.target), 0, x0, y0,
+ static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glTexSubImage3D(SurfaceTargetToGL(params.target), 0, x0, y0, 0,
+ static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
+ tuple.type, &gl_buffer[buffer_offset]);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+ &gl_buffer[buffer_offset]);
+ }
}
- glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
- static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
- tuple.format, tuple.type, &gl_buffer[buffer_offset]);
- glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
read_framebuffer.Create();
draw_framebuffer.Create();
+ copy_pbo.Create();
}
Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
return GetSurface(SurfaceParams::CreateForTexture(config));
}
-SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
- bool using_depth_fb,
- bool preserve_contents) {
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
+ const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
+ if (!regs.zeta.Address() || !regs.zeta_enable) {
+ return {};
+ }
- // TODO(bunnei): This is hard corded to use just the first render buffer
- LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");
+ SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
+ regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)};
- // get color and depth surfaces
- SurfaceParams color_params{};
- SurfaceParams depth_params{};
+ return GetSurface(depth_params, preserve_contents);
+}
- if (using_color_fb) {
- color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
- }
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
+ const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
- if (using_depth_fb) {
- depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
- regs.zeta.Address(), regs.zeta.format);
- }
+ ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
- MathUtil::Rectangle<u32> color_rect{};
- Surface color_surface;
- if (using_color_fb) {
- color_surface = GetSurface(color_params, preserve_contents);
- if (color_surface) {
- color_rect = color_surface->GetSurfaceParams().GetRect();
- }
+ if (index >= regs.rt_control.count) {
+ return {};
}
- MathUtil::Rectangle<u32> depth_rect{};
- Surface depth_surface;
- if (using_depth_fb) {
- depth_surface = GetSurface(depth_params, preserve_contents);
- if (depth_surface) {
- depth_rect = depth_surface->GetSurfaceParams().GetRect();
- }
+ if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+ return {};
}
- MathUtil::Rectangle<u32> fb_rect{};
- if (color_surface && depth_surface) {
- fb_rect = color_rect;
- // Color and Depth surfaces must have the same dimensions and offsets
- if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
- color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
- color_surface = GetSurface(color_params);
- depth_surface = GetSurface(depth_params);
- fb_rect = color_surface->GetSurfaceParams().GetRect();
- }
- } else if (color_surface) {
- fb_rect = color_rect;
- } else if (depth_surface) {
- fb_rect = depth_rect;
- }
+ const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
- return std::make_tuple(color_surface, depth_surface, fb_rect);
+ return GetSurface(color_params, preserve_contents);
}
void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
@@ -748,7 +759,6 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
}
void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
- surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
surface->FlushGLBuffer();
}
@@ -806,27 +816,26 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
// Get a new surface with the new parameters, and blit the previous surface to it
Surface new_surface{GetUncachedSurface(new_params)};
- // If format is unchanged, we can do a faster blit without reinterpreting pixel data
- if (params.pixel_format == new_params.pixel_format) {
+ if (params.pixel_format == new_params.pixel_format ||
+ !Settings::values.use_accurate_framebuffers) {
+ // If the format is the same, just do a framebuffer blit. This is significantly faster than
+ // using PBOs. The is also likely less accurate, as textures will be converted rather than
+ // reinterpreted.
+
BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
- new_surface->GetSurfaceParams().GetRect(), params.type,
- read_framebuffer.handle, draw_framebuffer.handle);
- return new_surface;
- }
+ params.GetRect(), params.type, read_framebuffer.handle,
+ draw_framebuffer.handle);
+ } else {
+ // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
+ // where pixels are reinterpreted as a new format (without conversion). This code path uses
+ // OpenGL PBOs and is quite slow.
- // When using accurate framebuffers, always copy old data to new surface, regardless of format
- if (Settings::values.use_accurate_framebuffers) {
auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
- size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
+ std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
- // Use a Pixel Buffer Object to download the previous texture and then upload it to the new
- // one using the new format.
- OGLBuffer pbo;
- pbo.Create();
-
- glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
if (source_format.compressed) {
glGetCompressedTextureImage(surface->Texture().handle, 0,
@@ -845,10 +854,10 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
// of the data in this case. Games like Super Mario Odyssey seem to hit this case
// when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
// but it doesn't clear it beforehand, the texture is already full of zeros.
- LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during "
- "reinterpretation but the texture is tiled.");
+ LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
+ "reinterpretation but the texture is tiled.");
}
- size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
+ std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
std::vector<u8> data(remaining_size);
Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size());
glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size,
@@ -859,21 +868,38 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
const auto& dest_rect{new_params.GetRect()};
- glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle);
+ glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle);
if (dest_format.compressed) {
- glCompressedTexSubImage2D(
- GL_TEXTURE_2D, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()),
- static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
- static_cast<GLsizei>(new_params.SizeInBytes()), nullptr);
+ LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
+ UNREACHABLE();
} else {
- glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
- static_cast<GLsizei>(dest_rect.GetWidth()),
- static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
- dest_format.type, nullptr);
+ switch (new_params.target) {
+ case SurfaceParams::SurfaceTarget::Texture1D:
+ glTextureSubImage1D(new_surface->Texture().handle, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()), dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture2D:
+ glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()),
+ static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceParams::SurfaceTarget::Texture2DArray:
+ glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0,
+ static_cast<GLsizei>(dest_rect.GetWidth()),
+ static_cast<GLsizei>(dest_rect.GetHeight()),
+ static_cast<GLsizei>(new_params.depth), dest_format.format,
+ dest_format.type, nullptr);
+ break;
+ default:
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+ static_cast<u32>(params.target));
+ UNREACHABLE();
+ }
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
- pbo.Release();
}
return new_surface;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index aad75f200..d7a4bc37f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -70,19 +70,20 @@ struct SurfaceParams {
RG8S = 42,
RG32UI = 43,
R32UI = 44,
+ ASTC_2D_8X8 = 45,
MaxColorFormat,
// Depth formats
- Z32F = 45,
- Z16 = 46,
+ Z32F = 46,
+ Z16 = 47,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 47,
- S8Z24 = 48,
- Z32FS8 = 49,
+ Z24S8 = 48,
+ S8Z24 = 49,
+ Z32FS8 = 50,
MaxDepthStencilFormat,
@@ -90,7 +91,7 @@ struct SurfaceParams {
Invalid = 255,
};
- static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
+ static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
enum class ComponentType {
Invalid = 0,
@@ -109,6 +110,33 @@ struct SurfaceParams {
Invalid = 4,
};
+ enum class SurfaceTarget {
+ Texture1D,
+ Texture2D,
+ Texture3D,
+ Texture1DArray,
+ Texture2DArray,
+ TextureCubemap,
+ };
+
+ static SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) {
+ switch (texture_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ return SurfaceTarget::Texture1D;
+ case Tegra::Texture::TextureType::Texture2D:
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ return SurfaceTarget::Texture2D;
+ case Tegra::Texture::TextureType::Texture1DArray:
+ return SurfaceTarget::Texture1DArray;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ return SurfaceTarget::Texture2DArray;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
+ UNREACHABLE();
+ return SurfaceTarget::Texture2D;
+ }
+ }
+
/**
* Gets the compression factor for the specified PixelFormat. This applies to just the
* "compressed width" and "compressed height", not the overall compression factor of a
@@ -165,6 +193,7 @@ struct SurfaceParams {
1, // RG8S
1, // RG32UI
1, // R32UI
+ 4, // ASTC_2D_8X8
1, // Z32F
1, // Z16
1, // Z24S8
@@ -172,8 +201,8 @@ struct SurfaceParams {
1, // Z32FS8
}};
- ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
- return compression_factor_table[static_cast<size_t>(format)];
+ ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
+ return compression_factor_table[static_cast<std::size_t>(format)];
}
static constexpr u32 GetFormatBpp(PixelFormat format) {
@@ -226,6 +255,7 @@ struct SurfaceParams {
16, // RG8S
64, // RG32UI
32, // R32UI
+ 16, // ASTC_2D_8X8
32, // Z32F
16, // Z16
32, // Z24S8
@@ -233,8 +263,8 @@ struct SurfaceParams {
64, // Z32FS8
}};
- ASSERT(static_cast<size_t>(format) < bpp_table.size());
- return bpp_table[static_cast<size_t>(format)];
+ ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
+ return bpp_table[static_cast<std::size_t>(format)];
}
u32 GetFormatBpp() const {
@@ -270,6 +300,7 @@ struct SurfaceParams {
return PixelFormat::ABGR8S;
case Tegra::RenderTargetFormat::RGBA8_UINT:
return PixelFormat::ABGR8UI;
+ case Tegra::RenderTargetFormat::BGRA8_SRGB:
case Tegra::RenderTargetFormat::BGRA8_UNORM:
return PixelFormat::BGRA8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
@@ -288,6 +319,8 @@ struct SurfaceParams {
return PixelFormat::R11FG11FB10F;
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
return PixelFormat::B5G6R5U;
+ case Tegra::RenderTargetFormat::BGR5A1_UNORM:
+ return PixelFormat::A1B5G5R5U;
case Tegra::RenderTargetFormat::RGBA32_UINT:
return PixelFormat::RGBA32UI;
case Tegra::RenderTargetFormat::R8_UNORM:
@@ -494,6 +527,8 @@ struct SurfaceParams {
return PixelFormat::BC6H_SF16;
case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
return PixelFormat::ASTC_2D_4X4;
+ case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
+ return PixelFormat::ASTC_2D_8X8;
case Tegra::Texture::TextureFormat::R16_G16:
switch (component_type) {
case Tegra::Texture::ComponentType::FLOAT:
@@ -542,11 +577,13 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGBA8_UNORM:
case Tegra::RenderTargetFormat::RGBA8_SRGB:
case Tegra::RenderTargetFormat::BGRA8_UNORM:
+ case Tegra::RenderTargetFormat::BGRA8_SRGB:
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
case Tegra::RenderTargetFormat::R8_UNORM:
case Tegra::RenderTargetFormat::RG16_UNORM:
case Tegra::RenderTargetFormat::R16_UNORM:
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+ case Tegra::RenderTargetFormat::BGR5A1_UNORM:
case Tegra::RenderTargetFormat::RG8_UNORM:
case Tegra::RenderTargetFormat::RGBA16_UNORM:
return ComponentType::UNorm;
@@ -607,16 +644,18 @@ struct SurfaceParams {
}
static SurfaceType GetFormatType(PixelFormat pixel_format) {
- if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) {
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
return SurfaceType::ColorTexture;
}
- if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) {
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
return SurfaceType::Depth;
}
- if (static_cast<size_t>(pixel_format) <
- static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) {
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
return SurfaceType::DepthStencil;
}
@@ -630,20 +669,19 @@ struct SurfaceParams {
MathUtil::Rectangle<u32> GetRect() const;
/// Returns the size of this surface in bytes, adjusted for compression
- size_t SizeInBytes() const {
+ std::size_t SizeInBytes() const {
const u32 compression_factor{GetCompressionFactor(pixel_format)};
ASSERT(width % compression_factor == 0);
ASSERT(height % compression_factor == 0);
return (width / compression_factor) * (height / compression_factor) *
- GetFormatBpp(pixel_format) / CHAR_BIT;
+ GetFormatBpp(pixel_format) * depth / CHAR_BIT;
}
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
/// Creates SurfaceParams from a framebuffer configuration
- static SurfaceParams CreateForFramebuffer(
- const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
+ static SurfaceParams CreateForFramebuffer(std::size_t index);
/// Creates SurfaceParams for a depth buffer configuration
static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -652,8 +690,8 @@ struct SurfaceParams {
/// Checks if surfaces are compatible for caching
bool IsCompatibleSurface(const SurfaceParams& other) const {
- return std::tie(pixel_format, type, cache_width, cache_height) ==
- std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height);
+ return std::tie(pixel_format, type, width, height) ==
+ std::tie(other.pixel_format, other.type, other.width, other.height);
}
VAddr addr;
@@ -664,12 +702,10 @@ struct SurfaceParams {
SurfaceType type;
u32 width;
u32 height;
+ u32 depth;
u32 unaligned_height;
- size_t size_in_bytes;
-
- // Parameters used for caching only
- u32 cache_width;
- u32 cache_height;
+ std::size_t size_in_bytes;
+ SurfaceTarget target;
};
}; // namespace OpenGL
@@ -685,7 +721,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
namespace std {
template <>
struct hash<SurfaceReserveKey> {
- size_t operator()(const SurfaceReserveKey& k) const {
+ std::size_t operator()(const SurfaceReserveKey& k) const {
return k.Hash();
}
};
@@ -701,7 +737,7 @@ public:
return params.addr;
}
- size_t GetSizeInBytes() const {
+ std::size_t GetSizeInBytes() const {
return params.size_in_bytes;
}
@@ -709,6 +745,10 @@ public:
return texture;
}
+ GLenum Target() const {
+ return gl_target;
+ }
+
static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
if (format == SurfaceParams::PixelFormat::Invalid)
return 0;
@@ -724,14 +764,14 @@ public:
void LoadGLBuffer();
void FlushGLBuffer();
- // Upload/Download data in gl_buffer in/to this surface's texture
+ // Upload data in gl_buffer to this surface's texture
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
- void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
private:
OGLTexture texture;
std::vector<u8> gl_buffer;
SurfaceParams params;
+ GLenum gl_target;
};
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -741,9 +781,11 @@ public:
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
- /// Get the color and depth surfaces based on the framebuffer configuration
- SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
- bool preserve_contents);
+ /// Get the depth surface based on the framebuffer configuration
+ Surface GetDepthBufferSurface(bool preserve_contents);
+
+ /// Get the color surface based on the framebuffer configuration and the specified render target
+ Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
/// Flushes the surface to Switch memory
void FlushSurface(const Surface& surface);
@@ -774,6 +816,10 @@ private:
OGLFramebuffer read_framebuffer;
OGLFramebuffer draw_framebuffer;
+
+ /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
+ /// using the new format.
+ OGLBuffer copy_pbo;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index ac9adfd83..894fe6eae 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -13,8 +13,8 @@ namespace OpenGL {
/// Gets the address for the specified shader stage program
static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
- auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
shader_config.offset);
}
@@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) {
/// Helper function to set shader uniform block bindings for a single shader stage
static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
- Maxwell::ShaderStage binding, size_t expected_size) {
+ Maxwell::ShaderStage binding, std::size_t expected_size) {
const GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
return;
@@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
GLint ub_size = 0;
glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
- ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size,
+ ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
"Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
}
@@ -85,23 +85,23 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
SetShaderUniformBlockBindings(program.handle);
}
-GLuint CachedShader::GetProgramResourceIndex(const std::string& name) {
- auto search{resource_cache.find(name)};
+GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
+ const auto search{resource_cache.find(buffer.GetHash())};
if (search == resource_cache.end()) {
const GLuint index{
- glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, name.c_str())};
- resource_cache[name] = index;
+ glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
+ resource_cache[buffer.GetHash()] = index;
return index;
}
return search->second;
}
-GLint CachedShader::GetUniformLocation(const std::string& name) {
- auto search{uniform_cache.find(name)};
+GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
+ const auto search{uniform_cache.find(sampler.GetHash())};
if (search == uniform_cache.end()) {
- const GLint index{glGetUniformLocation(program.handle, name.c_str())};
- uniform_cache[name] = index;
+ const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
+ uniform_cache[sampler.GetHash()] = index;
return index;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 759987604..9bafe43a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -4,8 +4,8 @@
#pragma once
+#include <map>
#include <memory>
-#include <unordered_map>
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
@@ -28,7 +28,7 @@ public:
}
/// Gets the size of the shader in guest memory, required for cache management
- size_t GetSizeInBytes() const {
+ std::size_t GetSizeInBytes() const {
return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64);
}
@@ -43,10 +43,10 @@ public:
}
/// Gets the GL program resource location for the specified resource, caching as needed
- GLuint GetProgramResourceIndex(const std::string& name);
+ GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
/// Gets the GL uniform location for the specified resource, caching as needed
- GLint GetUniformLocation(const std::string& name);
+ GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
private:
VAddr addr;
@@ -55,8 +55,8 @@ private:
GLShader::ShaderEntries entries;
OGLProgram program;
- std::unordered_map<std::string, GLuint> resource_cache;
- std::unordered_map<std::string, GLint> uniform_cache;
+ std::map<u32, GLuint> resource_cache;
+ std::map<u32, GLint> uniform_cache;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 391c92d47..b3e95187e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -12,6 +12,7 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
+#include "video_core/engines/shader_header.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -26,7 +27,7 @@ using Tegra::Shader::Sampler;
using Tegra::Shader::SubOp;
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
-constexpr u32 PROGRAM_HEADER_SIZE = 0x50;
+constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header);
class DecompileFail : public std::runtime_error {
public:
@@ -113,7 +114,7 @@ private:
/// Scans a range of code for labels and determines the exit method.
ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
- auto [iter, inserted] =
+ const auto [iter, inserted] =
exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
ExitMethod& exit_method = iter->second;
if (!inserted)
@@ -131,22 +132,22 @@ private:
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
return exit_method = ExitMethod::AlwaysEnd;
} else {
- ExitMethod not_met = Scan(offset + 1, end, labels);
+ const ExitMethod not_met = Scan(offset + 1, end, labels);
return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
}
}
case OpCode::Id::BRA: {
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
- ExitMethod no_jmp = Scan(offset + 1, end, labels);
- ExitMethod jmp = Scan(target, end, labels);
+ const ExitMethod no_jmp = Scan(offset + 1, end, labels);
+ const ExitMethod jmp = Scan(target, end, labels);
return exit_method = ParallelExit(no_jmp, jmp);
}
case OpCode::Id::SSY: {
// The SSY instruction uses a similar encoding as the BRA instruction.
ASSERT_MSG(instr.bra.constant_buffer == 0,
"Constant buffer SSY is not supported");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
labels.insert(target);
// Continue scanning for an exit method.
break;
@@ -189,7 +190,7 @@ public:
private:
void AppendIndentation() {
- shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+ shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
}
std::string shader_source;
@@ -208,7 +209,7 @@ public:
UnsignedInteger,
};
- GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
+ GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {}
/// Gets the GLSL type string for a register
static std::string GetTypeString() {
@@ -226,15 +227,23 @@ public:
}
/// Returns the index of the register
- size_t GetIndex() const {
+ std::size_t GetIndex() const {
return index;
}
private:
- const size_t index;
+ const std::size_t index;
const std::string& suffix;
};
+enum class InternalFlag : u64 {
+ ZeroFlag = 0,
+ CarryFlag = 1,
+ OverflowFlag = 2,
+ NaNFlag = 3,
+ Amount
+};
+
/**
* Used to manage shader registers that are emulated with GLSL. This class keeps track of the state
* of all registers (e.g. whether they are currently being used as Floats or Integers), and
@@ -247,6 +256,7 @@ public:
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
: shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
BuildRegisterList();
+ BuildInputList();
}
/**
@@ -327,13 +337,19 @@ public:
void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
const std::string& value, u64 dest_num_components,
u64 value_num_components, bool is_saturated = false,
- u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
+ u64 dest_elem = 0, Register::Size size = Register::Size::Word,
+ bool sets_cc = false) {
ASSERT_MSG(!is_saturated, "Unimplemented");
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
dest_num_components, value_num_components, dest_elem);
+
+ if (sets_cc) {
+ const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
+ SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
+ }
}
/**
@@ -343,12 +359,33 @@ public:
* @param elem The element to use for the operation.
* @param attribute The input attribute to use as the source value.
*/
- void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) {
- std::string dest = GetRegisterAsFloat(reg);
- std::string src = GetInputAttribute(attribute) + GetSwizzle(elem);
+ void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
+ const Tegra::Shader::IpaMode& input_mode) {
+ const std::string dest = GetRegisterAsFloat(reg);
+ const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
shader.AddLine(dest + " = " + src + ';');
}
+ std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
+ switch (cc) {
+ case Tegra::Shader::ControlCode::NEU:
+ return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')';
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ return "false";
+ }
+ }
+
+ std::string GetInternalFlag(const InternalFlag ii) const {
+ const u32 code = static_cast<u32>(ii);
+ return "internalFlag_" + std::to_string(code) + suffix;
+ }
+
+ void SetInternalFlag(const InternalFlag ii, const std::string& value) const {
+ shader.AddLine(GetInternalFlag(ii) + " = " + value + ';');
+ }
+
/**
* Writes code that does a output attribute assignment to register operation. Output attributes
* are stored as floats, so this may require conversion.
@@ -357,8 +394,8 @@ public:
* @param reg The register to use as the source value.
*/
void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
- std::string dest = GetOutputAttribute(attribute);
- std::string src = GetRegisterAsFloat(reg);
+ const std::string dest = GetOutputAttribute(attribute);
+ const std::string src = GetRegisterAsFloat(reg);
if (!dest.empty()) {
// Can happen with unknown/unimplemented output attributes, in which case we ignore the
@@ -391,9 +428,9 @@ public:
GLSLRegister::Type type) {
declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
- std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
- std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
- final_offset + " % 4]";
+ const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
+ const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
+ final_offset + " % 4]";
if (type == GLSLRegister::Type::Float) {
return value;
@@ -412,12 +449,19 @@ public:
}
declarations.AddNewLine();
- for (const auto& index : declr_input_attribute) {
+ for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
+ const InternalFlag code = static_cast<InternalFlag>(ii);
+ declarations.AddLine("bool " + GetInternalFlag(code) + " = false;");
+ }
+ declarations.AddNewLine();
+
+ for (const auto element : declr_input_attribute) {
// TODO(bunnei): Use proper number of elements for these
- declarations.AddLine("layout(location = " +
- std::to_string(static_cast<u32>(index) -
- static_cast<u32>(Attribute::Index::Attribute_0)) +
- ") in vec4 " + GetInputAttribute(index) + ';');
+ u32 idx =
+ static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0);
+ declarations.AddLine("layout(location = " + std::to_string(idx) + ")" +
+ GetInputFlags(element.first) + "in vec4 " +
+ GetInputAttribute(element.first, element.second) + ';');
}
declarations.AddNewLine();
@@ -440,13 +484,12 @@ public:
}
declarations.AddNewLine();
- // Append the sampler2D array for the used textures.
- size_t num_samplers = GetSamplers().size();
- if (num_samplers > 0) {
- declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
- std::to_string(num_samplers) + "];");
- declarations.AddNewLine();
+ const auto& samplers = GetSamplers();
+ for (const auto& sampler : samplers) {
+ declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
+ ';');
}
+ declarations.AddNewLine();
}
/// Returns a list of constant buffer declarations
@@ -458,27 +501,29 @@ public:
}
/// Returns a list of samplers used in the shader
- std::vector<SamplerEntry> GetSamplers() const {
+ const std::vector<SamplerEntry>& GetSamplers() const {
return used_samplers;
}
/// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
/// necessary.
- std::string AccessSampler(const Sampler& sampler) {
- size_t offset = static_cast<size_t>(sampler.index.Value());
+ std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
+ bool is_array) {
+ const std::size_t offset = static_cast<std::size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
- auto itr =
+ const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
+ ASSERT(itr->GetType() == type && itr->IsArray() == is_array);
return itr->GetName();
}
// Otherwise create a new mapping for this sampler
- size_t next_index = used_samplers.size();
- SamplerEntry entry{stage, offset, next_index};
+ const std::size_t next_index = used_samplers.size();
+ const SamplerEntry entry{stage, offset, next_index, type, is_array};
used_samplers.emplace_back(entry);
return entry.GetName();
}
@@ -527,16 +572,29 @@ private:
void BuildRegisterList() {
regs.reserve(Register::NumRegisters);
- for (size_t index = 0; index < Register::NumRegisters; ++index) {
+ for (std::size_t index = 0; index < Register::NumRegisters; ++index) {
regs.emplace_back(index, suffix);
}
}
+ void BuildInputList() {
+ const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) -
+ static_cast<u32>(Attribute::Index::Attribute_0) + 1;
+ declr_input_attribute.reserve(size);
+ }
+
/// Generates code representing an input attribute register.
- std::string GetInputAttribute(Attribute::Index attribute) {
+ std::string GetInputAttribute(Attribute::Index attribute,
+ const Tegra::Shader::IpaMode& input_mode) {
switch (attribute) {
case Attribute::Index::Position:
- return "position";
+ if (stage != Maxwell3D::Regs::ShaderStage::Fragment) {
+ return "position";
+ } else {
+ return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)";
+ }
+ case Attribute::Index::PointCoord:
+ return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)";
case Attribute::Index::TessCoordInstanceIDVertexID:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
@@ -552,7 +610,14 @@ private:
static_cast<u32>(Attribute::Index::Attribute_0)};
if (attribute >= Attribute::Index::Attribute_0 &&
attribute <= Attribute::Index::Attribute_31) {
- declr_input_attribute.insert(attribute);
+ if (declr_input_attribute.count(attribute) == 0) {
+ declr_input_attribute[attribute] = input_mode;
+ } else {
+ if (declr_input_attribute[attribute] != input_mode) {
+ LOG_CRITICAL(HW_GPU, "Same Input multiple input modes");
+ UNREACHABLE();
+ }
+ }
return "input_attribute_" + std::to_string(index);
}
@@ -563,6 +628,49 @@ private:
return "vec4(0, 0, 0, 0)";
}
+ std::string GetInputFlags(const Attribute::Index attribute) {
+ const Tegra::Shader::IpaSampleMode sample_mode =
+ declr_input_attribute[attribute].sampling_mode;
+ const Tegra::Shader::IpaInterpMode interp_mode =
+ declr_input_attribute[attribute].interpolation_mode;
+ std::string out;
+ switch (interp_mode) {
+ case Tegra::Shader::IpaInterpMode::Flat: {
+ out += "flat ";
+ break;
+ }
+ case Tegra::Shader::IpaInterpMode::Linear: {
+ out += "noperspective ";
+ break;
+ }
+ case Tegra::Shader::IpaInterpMode::Perspective: {
+ // Default, Smooth
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled Ipa InterpMode: {}", static_cast<u32>(interp_mode));
+ UNREACHABLE();
+ }
+ }
+ switch (sample_mode) {
+ case Tegra::Shader::IpaSampleMode::Centroid: {
+ // Note not implemented, it can be implemented with the "centroid " keyword in glsl;
+ LOG_CRITICAL(HW_GPU, "Ipa Sampler Mode: centroid, not implemented");
+ UNREACHABLE();
+ break;
+ }
+ case Tegra::Shader::IpaSampleMode::Default: {
+ // Default, n/a
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled Ipa SampleMode: {}", static_cast<u32>(sample_mode));
+ UNREACHABLE();
+ }
+ }
+ return out;
+ }
+
/// Generates code representing an output attribute register.
std::string GetOutputAttribute(Attribute::Index attribute) {
switch (attribute) {
@@ -593,7 +701,7 @@ private:
ShaderWriter& shader;
ShaderWriter& declarations;
std::vector<GLSLRegister> regs;
- std::set<Attribute::Index> declr_input_attribute;
+ std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute;
std::set<Attribute::Index> declr_output_attribute;
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
std::vector<SamplerEntry> used_samplers;
@@ -607,7 +715,7 @@ public:
u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
stage(stage), suffix(suffix) {
-
+ std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
Generate(suffix);
}
@@ -621,26 +729,9 @@ public:
}
private:
- // Shader program header for a Fragment Shader.
- struct FragmentHeader {
- INSERT_PADDING_WORDS(5);
- INSERT_PADDING_WORDS(13);
- u32 enabled_color_outputs;
- union {
- BitField<0, 1, u32> writes_samplemask;
- BitField<1, 1, u32> writes_depth;
- };
-
- bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
- u32 bit = render_target * 4 + component;
- return enabled_color_outputs & (1 << bit);
- }
- };
- static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong");
-
/// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const {
- auto iter = subroutines.find(Subroutine{begin, end, suffix});
+ const auto iter = subroutines.find(Subroutine{begin, end, suffix});
ASSERT(iter != subroutines.end());
return *iter;
}
@@ -656,8 +747,8 @@ private:
}
/// Generates code representing a texture sampler.
- std::string GetSampler(const Sampler& sampler) {
- return regs.AccessSampler(sampler);
+ std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) {
+ return regs.AccessSampler(sampler, type, is_array);
}
/**
@@ -685,7 +776,7 @@ private:
// Can't assign to the constant predicate.
ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
- std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
+ const std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
shader.AddLine(variable + " = " + value + ';');
declr_predicates.insert(std::move(variable));
}
@@ -795,7 +886,7 @@ private:
*/
bool IsSchedInstruction(u32 offset) const {
// sched instructions appear once every 4 instructions.
- static constexpr size_t SchedPeriod = 4;
+ static constexpr std::size_t SchedPeriod = 4;
u32 absolute_offset = offset - main_offset;
return (absolute_offset % SchedPeriod) == 0;
@@ -863,7 +954,7 @@ private:
std::string result;
result += '(';
- for (size_t i = 0; i < shift_amounts.size(); ++i) {
+ for (std::size_t i = 0; i < shift_amounts.size(); ++i) {
if (i)
result += '|';
result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] +
@@ -887,7 +978,7 @@ private:
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
- size_t written_components = 0;
+ std::size_t written_components = 0;
for (u32 component = 0; component < 4; ++component) {
if (!instr.texs.IsComponentEnabled(component)) {
continue;
@@ -941,10 +1032,8 @@ private:
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
void EmitFragmentOutputsWrite() {
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
- FragmentHeader header;
- std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE);
- ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented");
+ ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented");
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
@@ -953,18 +1042,22 @@ private:
++render_target) {
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
- if (header.IsColorComponentOutputEnabled(render_target, component)) {
- shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component,
+ if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
+ shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
regs.GetRegisterAsFloat(current_reg)));
++current_reg;
}
}
}
- if (header.writes_depth) {
+ if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';');
+
+ shader.AddLine(
+ "gl_FragDepth = " +
+ regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
+ ';');
}
}
@@ -1038,6 +1131,15 @@ private:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
// FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+ ASSERT_MSG(instr.fmul.tab5cb8_2 == 0, "FMUL tab5cb8_2({}) is not implemented",
+ instr.fmul.tab5cb8_2.Value());
+ ASSERT_MSG(instr.fmul.tab5c68_1 == 0, "FMUL tab5cb8_1({}) is not implemented",
+ instr.fmul.tab5c68_1.Value());
+ ASSERT_MSG(instr.fmul.tab5c68_0 == 1, "FMUL tab5cb8_0({}) is not implemented",
+ instr.fmul.tab5c68_0
+ .Value()); // SMO typical sends 1 here which seems to be the default
+ ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
+
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
instr.alu.saturate_d);
@@ -1357,7 +1459,7 @@ private:
if (instr.alu_integer.negate_b)
op_b = "-(" + op_b + ')';
- std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
+ const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
regs.SetRegisterToInteger(instr.gpr0, true, 0,
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
@@ -1375,7 +1477,7 @@ private:
case OpCode::Id::SEL_C:
case OpCode::Id::SEL_R:
case OpCode::Id::SEL_IMM: {
- std::string condition =
+ const std::string condition =
GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
regs.SetRegisterToInteger(instr.gpr0, true, 0,
'(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
@@ -1397,8 +1499,9 @@ private:
case OpCode::Id::LOP3_C:
case OpCode::Id::LOP3_R:
case OpCode::Id::LOP3_IMM: {
- std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
+ const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
std::string lut;
+
if (opcode->GetId() == OpCode::Id::LOP3_R) {
lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
} else {
@@ -1413,15 +1516,80 @@ private:
case OpCode::Id::IMNMX_IMM: {
ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None,
"Unimplemented");
- std::string condition =
+ const std::string condition =
GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
- std::string parameters = op_a + ',' + op_b;
+ const std::string parameters = op_a + ',' + op_b;
regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
'(' + condition + ") ? min(" + parameters + ") : max(" +
parameters + ')',
1, 1);
break;
}
+ case OpCode::Id::LEA_R2:
+ case OpCode::Id::LEA_R1:
+ case OpCode::Id::LEA_IMM:
+ case OpCode::Id::LEA_RZ:
+ case OpCode::Id::LEA_HI: {
+ std::string op_c;
+
+ switch (opcode->GetId()) {
+ case OpCode::Id::LEA_R2: {
+ op_a = regs.GetRegisterAsInteger(instr.gpr20);
+ op_b = regs.GetRegisterAsInteger(instr.gpr39);
+ op_c = std::to_string(instr.lea.r2.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_R1: {
+ const bool neg = instr.lea.r1.neg != 0;
+ op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_a = "-(" + op_a + ')';
+ op_b = regs.GetRegisterAsInteger(instr.gpr20);
+ op_c = std::to_string(instr.lea.r1.entry_a);
+ break;
+ }
+
+ case OpCode::Id::LEA_IMM: {
+ const bool neg = instr.lea.imm.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ break;
+ }
+
+ case OpCode::Id::LEA_RZ: {
+ const bool neg = instr.lea.rz.neg != 0;
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ if (neg)
+ op_b = "-(" + op_b + ')';
+ op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
+ GLSLRegister::Type::Integer);
+ op_c = std::to_string(instr.lea.rz.entry_a);
+
+ break;
+ }
+
+ case OpCode::Id::LEA_HI:
+ default: {
+ op_b = regs.GetRegisterAsInteger(instr.gpr8);
+ op_a = std::to_string(instr.lea.imm.entry_a);
+ op_c = std::to_string(instr.lea.imm.entry_b);
+ LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) {
+ LOG_ERROR(HW_GPU, "Unhandled LEA Predicate");
+ UNREACHABLE();
+ }
+ const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
opcode->GetName());
@@ -1432,10 +1600,16 @@ private:
break;
}
case OpCode::Type::Ffma: {
- std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
std::string op_b = instr.ffma.negate_b ? "-" : "";
std::string op_c = instr.ffma.negate_c ? "-" : "";
+ ASSERT_MSG(instr.ffma.cc == 0, "FFMA cc not implemented");
+ ASSERT_MSG(instr.ffma.tab5980_0 == 1, "FFMA tab5980_0({}) not implemented",
+ instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
+ ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
+ instr.ffma.tab5980_1.Value());
+
switch (opcode->GetId()) {
case OpCode::Id::FFMA_CR: {
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
@@ -1486,7 +1660,8 @@ private:
}
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
- 1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
+ 1, instr.alu.saturate_d, 0, instr.conversion.dest_size,
+ instr.generates_cc.Value() != 0);
break;
}
case OpCode::Id::I2F_R:
@@ -1616,9 +1791,34 @@ private:
case OpCode::Type::Memory: {
switch (opcode->GetId()) {
case OpCode::Id::LD_A: {
- ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
- regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
- instr.attribute.fmt20.index);
+ // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+ ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+ "Indirect attribute loads are not supported");
+ ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+ "Unaligned attribute loads are not supported");
+
+ Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
+ Tegra::Shader::IpaSampleMode::Default};
+
+ u64 next_element = instr.attribute.fmt20.element;
+ u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+ const auto LoadNextElement = [&](u32 reg_offset) {
+ regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element,
+ static_cast<Attribute::Index>(next_index),
+ input_mode);
+
+ // Load the next attribute element into the following register. If the element
+ // to load goes beyond the vec4 size, load the first element of the next
+ // attribute.
+ next_element = (next_element + 1) % 4;
+ next_index = next_index + (next_element == 0 ? 1 : 0);
+ };
+
+ const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+ for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+ LoadNextElement(reg_offset);
+ }
break;
}
case OpCode::Id::LD_C: {
@@ -1632,7 +1832,7 @@ private:
shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
" / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
- std::string op_a =
+ const std::string op_a =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
GLSLRegister::Type::Float);
@@ -1642,7 +1842,7 @@ private:
break;
case Tegra::Shader::UniformType::Double: {
- std::string op_b =
+ const std::string op_b =
regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
"index", GLSLRegister::Type::Float);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
@@ -1660,25 +1860,111 @@ private:
break;
}
case OpCode::Id::ST_A: {
- ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
- regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
- instr.attribute.fmt20.element, instr.gpr0);
+ ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
+ "Indirect attribute loads are not supported");
+ ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0,
+ "Unaligned attribute loads are not supported");
+
+ u64 next_element = instr.attribute.fmt20.element;
+ u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+ const auto StoreNextElement = [&](u32 reg_offset) {
+ regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index),
+ next_element,
+ instr.gpr0.Value() + reg_offset);
+
+ // Load the next attribute element into the following register. If the element
+ // to load goes beyond the vec4 size, load the first element of the next
+ // attribute.
+ next_element = (next_element + 1) % 4;
+ next_index = next_index + (next_element == 0 ? 1 : 0);
+ };
+
+ const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+ for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+ StoreNextElement(reg_offset);
+ }
+
break;
}
case OpCode::Id::TEX: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented");
+ Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
+ std::string coord;
+
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture1D: {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ coord = "float coords = " + x + ';';
+ break;
+ }
+ case Tegra::Shader::TextureType::Texture2D: {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+
+ // Fallback to interpreting as a 2D texture for now
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ }
+ // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
+ // or lod.
+ const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
+
+ const std::string sampler = GetSampler(instr.sampler, texture_type, false);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
+
shader.AddLine("{");
++shader.scope;
shader.AddLine(coord);
- const std::string texture = "texture(" + sampler + ", coords)";
+ std::string texture;
- size_t dest_elem{};
- for (size_t elem = 0; elem < 4; ++elem) {
+ switch (instr.tex.process_mode) {
+ case Tegra::Shader::TextureProcessMode::None: {
+ texture = "texture(" + sampler + ", coords)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LZ: {
+ texture = "textureLod(" + sampler + ", coords, 0.0)";
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LB:
+ case Tegra::Shader::TextureProcessMode::LBA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "texture(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ case Tegra::Shader::TextureProcessMode::LL:
+ case Tegra::Shader::TextureProcessMode::LLA: {
+ // TODO: Figure if A suffix changes the equation at all.
+ texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
+ break;
+ }
+ default: {
+ texture = "texture(" + sampler + ", coords)";
+ LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}",
+ static_cast<u32>(instr.tex.process_mode.Value()));
+ UNREACHABLE();
+ }
+ }
+ std::size_t dest_elem{};
+ for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
@@ -1691,20 +1977,77 @@ private:
break;
}
case OpCode::Id::TEXS: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ std::string coord;
+ Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
+ bool is_array{instr.texs.IsArrayTexture()};
+
+ ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (is_array) {
+ const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
+ } else {
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+ // Fallback to interpreting as a 2D texture for now
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ is_array = false;
+ }
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
const std::string texture = "texture(" + sampler + ", coords)";
WriteTexsInstruction(instr, coord, texture);
break;
}
case OpCode::Id::TLDS: {
- const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsInteger(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "ivec2 coords = ivec2(" + op_a + ", " + op_b + ");";
+ ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D);
+ ASSERT(instr.tlds.IsArrayTexture() == false);
+ std::string coord;
+
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
+ "MZ is not implemented");
+
+ switch (instr.tlds.GetTextureType()) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (instr.tlds.IsArrayTexture()) {
+ LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture");
+ UNREACHABLE();
+ } else {
+ const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
+ const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
+ coord = "ivec2 coords = ivec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(instr.tlds.GetTextureType()));
+ UNREACHABLE();
+ }
+ const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(),
+ instr.tlds.IsArrayTexture());
const std::string texture = "texelFetch(" + sampler + ", coords, 0)";
WriteTexsInstruction(instr, coord, texture);
break;
@@ -1712,12 +2055,23 @@ private:
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
ASSERT(instr.tld4.array == 0);
- std::string coord{};
+ std::string coord;
+
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+ "NDV is not implemented");
+ ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
+ "PTP is not implemented");
switch (instr.tld4.texture_type) {
case Tegra::Shader::TextureType::Texture2D: {
- std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
coord = "vec2 coords = vec2(" + x + ", " + y + ");";
break;
}
@@ -1727,7 +2081,8 @@ private:
UNREACHABLE();
}
- const std::string sampler = GetSampler(instr.sampler);
+ const std::string sampler =
+ GetSampler(instr.sampler, instr.tld4.texture_type, false);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
shader.AddLine("{");
@@ -1736,8 +2091,8 @@ private:
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4.component) + ')';
- size_t dest_elem{};
- for (size_t elem = 0; elem < 4; ++elem) {
+ std::size_t dest_elem{};
+ for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
@@ -1750,16 +2105,100 @@ private:
break;
}
case OpCode::Id::TLD4S: {
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+ ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC),
+ "DC is not implemented");
+
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
- const std::string sampler = GetSampler(instr.sampler);
+ const std::string sampler =
+ GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4s.component) + ')';
WriteTexsInstruction(instr, coord, texture);
break;
}
+ case OpCode::Id::TXQ: {
+ ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+
+ // TODO: the new commits on the texture refactor, change the way samplers work.
+ // Sadly, not all texture instructions specify the type of texture their sampler
+ // uses. This must be fixed at a later instance.
+ const std::string sampler =
+ GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
+ switch (instr.txq.query_type) {
+ case Tegra::Shader::TextureQueryType::Dimension: {
+ const std::string texture = "textureQueryLevels(" + sampler + ')';
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, texture, 1, 1);
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled texture query type: {}",
+ static_cast<u32>(instr.txq.query_type.Value()));
+ UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case OpCode::Id::TMML: {
+ ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
+ "NODEP is not implemented");
+ ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+ "NDV is not implemented");
+
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const bool is_array = instr.tmml.array != 0;
+ auto texture_type = instr.tmml.texture_type.Value();
+ const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
+
+ // TODO: add coordinates for different samplers once other texture types are
+ // implemented.
+ std::string coord;
+ switch (texture_type) {
+ case Tegra::Shader::TextureType::Texture1D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ coord = "float coords = " + x + ';';
+ break;
+ }
+ case Tegra::Shader::TextureType::Texture2D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ default:
+ LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+ static_cast<u32>(texture_type));
+ UNREACHABLE();
+
+ // Fallback to interpreting as a 2D texture for now
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ texture_type = Tegra::Shader::TextureType::Texture2D;
+ }
+ // Add an extra scope and declare the texture coords inside to prevent
+ // overwriting them in case they are used as outputs of the texs instruction.
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine(coord);
+ const std::string texture = "textureQueryLod(" + sampler + ", coords)";
+ const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
+ shader.AddLine(tmp);
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
+ regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
+ --shader.scope;
+ shader.AddLine('}');
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
UNREACHABLE();
@@ -1799,12 +2238,12 @@ private:
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.fsetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.fsetp.op);
- std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
+ const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.fsetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1818,7 +2257,8 @@ private:
break;
}
case OpCode::Type::IntegerSetPredicate: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
+ const std::string op_a =
+ regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
std::string op_b;
if (instr.is_b_imm) {
@@ -1835,12 +2275,12 @@ private:
// We can't use the constant predicate as destination.
ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.isetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.isetp.op);
- std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
+ const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
// Set the primary predicate to the result of Predicate OP SecondPredicate
SetPredicate(instr.isetp.pred3,
'(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1853,32 +2293,80 @@ private:
}
break;
}
+ case OpCode::Type::PredicateSetRegister: {
+ const std::string op_a =
+ GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+ const std::string op_b =
+ GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+
+ const std::string second_pred =
+ GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+ const std::string combiner = GetPredicateCombiner(instr.pset.op);
+
+ const std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
+ const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
+ if (instr.pset.bf == 0) {
+ const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
+ regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+ } else {
+ const std::string value = '(' + result + ") ? 1.0 : 0.0";
+ regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+ }
+
+ break;
+ }
case OpCode::Type::PredicateSetPredicate: {
- std::string op_a =
- GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
- std::string op_b =
- GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+ switch (opcode->GetId()) {
+ case OpCode::Id::PSETP: {
+ const std::string op_a =
+ GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+ const std::string op_b =
+ GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
- // We can't use the constant predicate as destination.
- ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+ // We can't use the constant predicate as destination.
+ ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- std::string second_pred =
- GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+ const std::string second_pred =
+ GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
- std::string combiner = GetPredicateCombiner(instr.psetp.op);
+ const std::string combiner = GetPredicateCombiner(instr.psetp.op);
- std::string predicate =
- '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
+ const std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
- // Set the primary predicate to the result of Predicate OP SecondPredicate
- SetPredicate(instr.psetp.pred3,
- '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+ // Set the primary predicate to the result of Predicate OP SecondPredicate
+ SetPredicate(instr.psetp.pred3,
+ '(' + predicate + ") " + combiner + " (" + second_pred + ')');
- if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
- // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
- // if enabled
- SetPredicate(instr.psetp.pred0,
- "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+ // if enabled
+ SetPredicate(instr.psetp.pred0,
+ "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ }
+ break;
+ }
+ case OpCode::Id::CSETP: {
+ const std::string pred =
+ GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
+ const std::string combiner = GetPredicateCombiner(instr.csetp.op);
+ const std::string controlCode = regs.GetControlCode(instr.csetp.cc);
+ if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
+ SetPredicate(instr.csetp.pred3,
+ '(' + controlCode + ") " + combiner + " (" + pred + ')');
+ }
+ if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ SetPredicate(instr.csetp.pred0,
+ "!(" + controlCode + ") " + combiner + " (" + pred + ')');
+ }
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
}
break;
}
@@ -1893,7 +2381,7 @@ private:
std::string op_b = instr.fset.neg_b ? "-" : "";
if (instr.is_b_imm) {
- std::string imm = GetImmediate19(instr);
+ const std::string imm = GetImmediate19(instr);
if (instr.fset.neg_imm)
op_b += "(-" + imm + ')';
else
@@ -1913,13 +2401,14 @@ private:
// The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.fset.op);
+ const std::string combiner = GetPredicateCombiner(instr.fset.op);
- std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
- ") " + combiner + " (" + second_pred + "))";
+ const std::string predicate = "((" +
+ GetPredicateComparison(instr.fset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.fset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -1930,7 +2419,7 @@ private:
break;
}
case OpCode::Type::IntegerSet: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+ const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
std::string op_b;
@@ -1947,13 +2436,14 @@ private:
// The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
// condition is true, and to 0 otherwise.
- std::string second_pred =
+ const std::string second_pred =
GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
- std::string combiner = GetPredicateCombiner(instr.iset.op);
+ const std::string combiner = GetPredicateCombiner(instr.iset.op);
- std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
- ") " + combiner + " (" + second_pred + "))";
+ const std::string predicate = "((" +
+ GetPredicateComparison(instr.iset.cond, op_a, op_b) +
+ ") " + combiner + " (" + second_pred + "))";
if (instr.iset.bf) {
regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2103,45 +2593,22 @@ private:
case OpCode::Id::BRA: {
ASSERT_MSG(instr.bra.constant_buffer == 0,
"BRA with constant buffers are not implemented");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
break;
}
case OpCode::Id::IPA: {
const auto& attribute = instr.attribute.fmt28;
const auto& reg = instr.gpr0;
- switch (instr.ipa.mode) {
- case Tegra::Shader::IpaMode::Pass:
- if (stage == Maxwell3D::Regs::ShaderStage::Fragment &&
- attribute.index == Attribute::Index::Position) {
- switch (attribute.element) {
- case 0:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.x;");
- break;
- case 1:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.y;");
- break;
- case 2:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = gl_FragCoord.z;");
- break;
- case 3:
- shader.AddLine(regs.GetRegisterAsFloat(reg) + " = 1.0;");
- break;
- }
- } else {
- regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
- }
- break;
- case Tegra::Shader::IpaMode::None:
- regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
- break;
- default:
- LOG_CRITICAL(HW_GPU, "Unhandled IPA mode: {}",
- static_cast<u32>(instr.ipa.mode.Value()));
- UNREACHABLE();
- regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index);
- }
+ Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
+ instr.ipa.sample_mode.Value()};
+ regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index,
+ input_mode);
+
+ if (instr.ipa.saturate) {
+ regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true);
+ }
break;
}
case OpCode::Id::SSY: {
@@ -2150,7 +2617,7 @@ private:
// has a similar structure to the BRA opcode.
ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
- u32 target = offset + instr.bra.GetBranchTarget();
+ const u32 target = offset + instr.bra.GetBranchTarget();
EmitPushToSSYStack(target);
break;
}
@@ -2244,10 +2711,10 @@ private:
shader.AddLine("case " + std::to_string(label) + "u: {");
++shader.scope;
- auto next_it = labels.lower_bound(label + 1);
- u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+ const auto next_it = labels.lower_bound(label + 1);
+ const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
- u32 compile_end = CompileRange(label, next_label);
+ const u32 compile_end = CompileRange(label, next_label);
if (compile_end > next_label && compile_end != PROGRAM_END) {
// This happens only when there is a label inside a IF/LOOP block
shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
@@ -2289,6 +2756,7 @@ private:
private:
const std::set<Subroutine>& subroutines;
const ProgramCode& program_code;
+ Tegra::Shader::Header header;
const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage;
const std::string& suffix;
@@ -2310,7 +2778,8 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix) {
try {
- auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
+ const auto subroutines =
+ ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 6ca05945e..b0466c18f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -42,6 +42,7 @@ layout (std140) uniform vs_config {
};
void main() {
+ position = vec4(0.0, 0.0, 0.0, 0.0);
exec_vertex();
)";
@@ -87,7 +88,14 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
.get_value_or({});
out += R"(
in vec4 position;
-layout(location = 0) out vec4 color[8];
+layout(location = 0) out vec4 FragColor0;
+layout(location = 1) out vec4 FragColor1;
+layout(location = 2) out vec4 FragColor2;
+layout(location = 3) out vec4 FragColor3;
+layout(location = 4) out vec4 FragColor4;
+layout(location = 5) out vec4 FragColor5;
+layout(location = 6) out vec4 FragColor6;
+layout(location = 7) out vec4 FragColor7;
layout (std140) uniform fs_config {
vec4 viewport_flip;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index c788099d4..d53b93ad5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -9,10 +9,11 @@
#include <vector>
#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
namespace OpenGL::GLShader {
-constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
+constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000};
using ProgramCode = std::vector<u64>;
class ConstBufferEntry {
@@ -50,7 +51,11 @@ public:
}
std::string GetName() const {
- return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index);
+ return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index);
+ }
+
+ u32 GetHash() const {
+ return (static_cast<u32>(stage) << 16) | index;
}
private:
@@ -69,14 +74,15 @@ class SamplerEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
- SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
- : offset(offset), stage(stage), sampler_index(index) {}
+ SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index,
+ Tegra::Shader::TextureType type, bool is_array)
+ : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
- size_t GetOffset() const {
+ std::size_t GetOffset() const {
return offset;
}
- size_t GetIndex() const {
+ std::size_t GetIndex() const {
return sampler_index;
}
@@ -85,23 +91,63 @@ public:
}
std::string GetName() const {
- return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
- std::to_string(sampler_index) + ']';
+ return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' +
+ std::to_string(sampler_index);
+ }
+
+ std::string GetTypeString() const {
+ using Tegra::Shader::TextureType;
+ std::string glsl_type;
+
+ switch (type) {
+ case TextureType::Texture1D:
+ glsl_type = "sampler1D";
+ break;
+ case TextureType::Texture2D:
+ glsl_type = "sampler2D";
+ break;
+ case TextureType::Texture3D:
+ glsl_type = "sampler3D";
+ break;
+ case TextureType::TextureCube:
+ glsl_type = "samplerCube";
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ if (is_array)
+ glsl_type += "Array";
+ return glsl_type;
+ }
+
+ Tegra::Shader::TextureType GetType() const {
+ return type;
+ }
+
+ bool IsArray() const {
+ return is_array;
+ }
+
+ u32 GetHash() const {
+ return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index);
}
static std::string GetArrayName(Maxwell::ShaderStage stage) {
- return TextureSamplerNames[static_cast<size_t>(stage)];
+ return TextureSamplerNames[static_cast<std::size_t>(stage)];
}
private:
static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
"tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
};
+
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling
/// instruction.
- size_t offset;
- Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
- size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+ std::size_t offset;
+ Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
+ std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+ Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
+ bool is_array; ///< Whether the texture is being sampled as an array texture or not.
};
struct ShaderEntries {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 533e42caa..b86cd96e8 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -12,7 +12,7 @@
namespace OpenGL::GLShader {
/// Number of OpenGL texture samplers that can be used in the fragment shader
-static constexpr size_t NumTextureSamplers = 32;
+static constexpr std::size_t NumTextureSamplers = 32;
using Tegra::Engines::Maxwell3D;
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5781d9d16..5f3fe067e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -25,7 +25,7 @@ GLuint LoadShader(const char* source, GLenum type) {
default:
UNREACHABLE();
}
- GLuint shader_id = glCreateShader(type);
+ const GLuint shader_id = glCreateShader(type);
glShaderSource(shader_id, 1, &source, nullptr);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 60a4defd1..af99132ba 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -200,9 +200,9 @@ void OpenGLState::Apply() const {
const auto& texture_unit = texture_units[i];
const auto& cur_state_texture_unit = cur_state.texture_units[i];
- if (texture_unit.texture_2d != cur_state_texture_unit.texture_2d) {
+ if (texture_unit.texture != cur_state_texture_unit.texture) {
glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
- glBindTexture(GL_TEXTURE_2D, texture_unit.texture_2d);
+ glBindTexture(texture_unit.target, texture_unit.texture);
}
if (texture_unit.sampler != cur_state_texture_unit.sampler) {
glBindSampler(static_cast<GLuint>(i), texture_unit.sampler);
@@ -214,7 +214,7 @@ void OpenGLState::Apply() const {
texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
texture_unit.swizzle.b, texture_unit.swizzle.a};
- glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
+ glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
}
}
@@ -272,7 +272,7 @@ void OpenGLState::Apply() const {
}
// Clip distance
- for (size_t i = 0; i < clip_distance.size(); ++i) {
+ for (std::size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) {
if (clip_distance[i]) {
glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
@@ -287,7 +287,7 @@ void OpenGLState::Apply() const {
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
for (auto& unit : texture_units) {
- if (unit.texture_2d == handle) {
+ if (unit.texture == handle) {
unit.Unbind();
}
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 46e96a97d..e3e24b9e7 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -94,8 +94,9 @@ public:
// 3 texture units - one for each that is used in PICA fragment shader emulation
struct TextureUnit {
- GLuint texture_2d; // GL_TEXTURE_BINDING_2D
- GLuint sampler; // GL_SAMPLER_BINDING
+ GLuint texture; // GL_TEXTURE_BINDING_2D
+ GLuint sampler; // GL_SAMPLER_BINDING
+ GLenum target;
struct {
GLint r; // GL_TEXTURE_SWIZZLE_R
GLint g; // GL_TEXTURE_SWIZZLE_G
@@ -104,7 +105,7 @@ public:
} swizzle;
void Unbind() {
- texture_2d = 0;
+ texture = 0;
swizzle.r = GL_RED;
swizzle.g = GL_GREEN;
swizzle.b = GL_BLUE;
@@ -114,6 +115,7 @@ public:
void Reset() {
Unbind();
sampler = 0;
+ target = GL_TEXTURE_2D;
}
};
std::array<TextureUnit, 32> texture_units;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e565afcee..664f3ca20 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -29,7 +29,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coh
if (GLAD_GL_ARB_buffer_storage) {
persistent = true;
coherent = prefer_coherent;
- GLbitfield flags =
+ const GLbitfield flags =
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
glBufferStorage(gl_target, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapBufferRange(
@@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
mapped_size = size;
if (alignment > 0) {
- buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
+ buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}
bool invalidate = false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 411a73d50..96d916b07 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -177,7 +177,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
Memory::GetPointer(framebuffer_addr),
gl_framebuffer_data.data(), true);
- state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+ state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -194,7 +194,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
}
@@ -205,7 +205,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
*/
void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture) {
- state.texture_units[0].texture_2d = texture.resource.handle;
+ state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -214,7 +214,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
// Update existing texture
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
@@ -260,7 +260,7 @@ void RendererOpenGL::InitOpenGLObjects() {
// Allocation of storage is deferred until the first frame, when we
// know the framebuffer size.
- state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+ state.texture_units[0].texture = screen_info.texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
@@ -272,7 +272,7 @@ void RendererOpenGL::InitOpenGLObjects() {
screen_info.display_texture = screen_info.texture.resource.handle;
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
// Clear screen to black
@@ -305,14 +305,14 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
UNREACHABLE();
}
- state.texture_units[0].texture_2d = texture.resource.handle;
+ state.texture_units[0].texture = texture.resource.handle;
state.Apply();
glActiveTexture(GL_TEXTURE0);
glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
texture.gl_format, texture.gl_type, nullptr);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
@@ -354,14 +354,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
}};
- state.texture_units[0].texture_2d = screen_info.display_texture;
+ state.texture_units[0].texture = screen_info.display_texture;
state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
state.Apply();
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- state.texture_units[0].texture_2d = 0;
+ state.texture_units[0].texture = 0;
state.Apply();
}
@@ -369,6 +369,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
* Draws the emulated screens to the emulator window.
*/
void RendererOpenGL::DrawScreen() {
+ if (renderer_settings.set_background_color) {
+ // Update background color before drawing
+ glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
+ 0.0f);
+ }
+
const auto& layout = render_window.GetFramebufferLayout();
const auto& screen = layout.screen;
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 272294c62..20ba6d4f6 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
}
}
+template <std::size_t N, std::size_t M>
+struct alignas(64) SwizzleTable {
+ constexpr SwizzleTable() {
+ for (u32 y = 0; y < N; ++y) {
+ for (u32 x = 0; x < M; ++x) {
+ const u32 x2 = x * 16;
+ values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
+ ((x2 % 32) / 16) * 32 + (y % 2) * 16);
+ }
+ }
+ }
+ const std::array<u16, M>& operator[](std::size_t index) const {
+ return values[index];
+ }
+ std::array<std::array<u16, M>, N> values{};
+};
+
+constexpr auto swizzle_table = SwizzleTable<8, 4>();
+
+void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data,
+ u8* unswizzled_data, bool unswizzle, u32 block_height) {
+ std::array<u8*, 2> data_ptrs;
+ const std::size_t stride{width * bytes_per_pixel};
+ const std::size_t image_width_in_gobs{(stride + 63) / 64};
+ const std::size_t copy_size{16};
+ for (std::size_t y = 0; y < height; ++y) {
+ const std::size_t initial_gob =
+ (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs +
+ (y % (8 * block_height) / 8) * 512;
+ const std::size_t pixel_base{y * width * bytes_per_pixel};
+ const auto& table = swizzle_table[y % 8];
+ for (std::size_t xb = 0; xb < stride; xb += copy_size) {
+ const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height};
+ const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]};
+ const std::size_t pixel_index{xb + pixel_base};
+ data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
+ data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+ std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
+ }
+ }
+}
+
u32 BytesPerPixel(TextureFormat format) {
switch (format) {
case TextureFormat::DXT1:
@@ -63,6 +105,7 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::R32_G32_B32:
return 12;
case TextureFormat::ASTC_2D_4X4:
+ case TextureFormat::ASTC_2D_8X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::BF10GF11RF11:
@@ -91,8 +134,13 @@ u32 BytesPerPixel(TextureFormat format) {
std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
u32 height, u32 block_height) {
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
- CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
- Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+ if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
+ FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel,
+ Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+ } else {
+ CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel,
+ Memory::GetPointer(address), unswizzled_data.data(), true, block_height);
+ }
return unswizzled_data;
}
@@ -111,6 +159,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::BC6H_UF16:
case TextureFormat::BC6H_SF16:
case TextureFormat::ASTC_2D_4X4:
+ case TextureFormat::ASTC_2D_8X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A1B5G5R5:
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c6bd2f4b9..c2fb824b2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -170,8 +170,12 @@ struct TICEntry {
BitField<0, 16, u32> width_minus_1;
BitField<23, 4, TextureType> texture_type;
};
- u16 height_minus_1;
- INSERT_PADDING_BYTES(10);
+ union {
+ BitField<0, 16, u32> height_minus_1;
+ BitField<16, 15, u32> depth_minus_1;
+ };
+
+ INSERT_PADDING_BYTES(8);
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
@@ -192,6 +196,10 @@ struct TICEntry {
return height_minus_1 + 1;
}
+ u32 Depth() const {
+ return depth_minus_1 + 1;
+ }
+
u32 BlockHeight() const {
ASSERT(header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey);