summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/command_processor.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp94
-rw-r--r--src/video_core/engines/maxwell_3d.h95
-rw-r--r--src/video_core/engines/shader_bytecode.h47
-rw-r--r--src/video_core/engines/shader_header.h5
-rw-r--r--src/video_core/macro_interpreter.cpp25
-rw-r--r--src/video_core/macro_interpreter.h17
-rw-r--r--src/video_core/memory_manager.cpp77
-rw-r--r--src/video_core/memory_manager.h13
-rw-r--r--src/video_core/rasterizer_cache.cpp7
-rw-r--r--src/video_core/rasterizer_cache.h44
-rw-r--r--src/video_core/renderer_base.cpp1
-rw-r--r--src/video_core/renderer_base.h6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp272
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp615
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h774
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp186
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h132
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp453
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp437
-rw-r--r--src/video_core/renderer_opengl/gl_state.h84
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp5
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h93
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp25
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_opengl/utils.cpp38
-rw-r--r--src/video_core/renderer_opengl/utils.h15
-rw-r--r--src/video_core/surface.cpp490
-rw-r--r--src/video_core/surface.h477
-rw-r--r--src/video_core/textures/astc.cpp32
-rw-r--r--src/video_core/textures/astc.h2
-rw-r--r--src/video_core/textures/decoders.cpp60
-rw-r--r--src/video_core/textures/decoders.h17
-rw-r--r--src/video_core/textures/texture.h18
-rw-r--r--src/video_core/utils.h26
45 files changed, 3148 insertions, 1644 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 09ecc5bad..a780215c1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(video_core STATIC
macro_interpreter.h
memory_manager.cpp
memory_manager.h
+ rasterizer_cache.cpp
rasterizer_cache.h
rasterizer_interface.h
renderer_base.cpp
@@ -33,6 +34,7 @@ add_library(video_core STATIC
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp
renderer_opengl/gl_rasterizer_cache.h
+ renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
renderer_opengl/gl_shader_cache.cpp
renderer_opengl/gl_shader_cache.h
@@ -51,6 +53,10 @@ add_library(video_core STATIC
renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
+ renderer_opengl/utils.cpp
+ renderer_opengl/utils.h
+ surface.cpp
+ surface.h
textures/astc.cpp
textures/astc.h
textures/decoders.cpp
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index f1aa6091b..28e8c13aa 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -81,7 +81,7 @@ void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
for (auto entry : commands) {
Tegra::GPUVAddr address = entry.Address();
u32 size = entry.sz;
- const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+ const std::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
VAddr current_addr = *head_address;
while (current_addr < *head_address + size * sizeof(CommandHeader)) {
const CommandHeader header = {Memory::Read32(current_addr)};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index bca014a4a..6de07ea56 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <cinttypes>
+#include <cstring>
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
@@ -19,21 +20,69 @@ namespace Tegra::Engines {
constexpr u32 MacroRegistersStart = 0xE00;
Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
- : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {}
+ : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {
+ InitializeRegisterDefaults();
+}
+
+void Maxwell3D::InitializeRegisterDefaults() {
+ // Initializes registers to their default values - what games expect them to be at boot. This is
+ // for certain registers that may not be explicitly set by games.
+
+ // Reset all registers to zero
+ std::memset(&regs, 0, sizeof(regs));
+
+ // Depth range near/far is not always set, but is expected to be the default 0.0f, 1.0f. This is
+ // needed for ARMS.
+ for (std::size_t viewport{}; viewport < Regs::NumViewports; ++viewport) {
+ regs.viewport[viewport].depth_range_near = 0.0f;
+ regs.viewport[viewport].depth_range_far = 1.0f;
+ }
+ // Doom and Bomberman seems to use the uninitialized registers and just enable blend
+ // so initialize blend registers with sane values
+ regs.blend.equation_rgb = Regs::Blend::Equation::Add;
+ regs.blend.factor_source_rgb = Regs::Blend::Factor::One;
+ regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero;
+ regs.blend.equation_a = Regs::Blend::Equation::Add;
+ regs.blend.factor_source_a = Regs::Blend::Factor::One;
+ regs.blend.factor_dest_a = Regs::Blend::Factor::Zero;
+ for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) {
+ regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add;
+ regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One;
+ regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero;
+ regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add;
+ regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One;
+ regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero;
+ }
+ regs.stencil_front_op_fail = Regs::StencilOp::Keep;
+ regs.stencil_front_op_zfail = Regs::StencilOp::Keep;
+ regs.stencil_front_op_zpass = Regs::StencilOp::Keep;
+ regs.stencil_front_func_func = Regs::ComparisonOp::Always;
+ regs.stencil_front_func_mask = 0xFFFFFFFF;
+ regs.stencil_front_mask = 0xFFFFFFFF;
+ regs.stencil_two_side_enable = 1;
+ regs.stencil_back_op_fail = Regs::StencilOp::Keep;
+ regs.stencil_back_op_zfail = Regs::StencilOp::Keep;
+ regs.stencil_back_op_zpass = Regs::StencilOp::Keep;
+ regs.stencil_back_func_func = Regs::ComparisonOp::Always;
+ regs.stencil_back_func_mask = 0xFFFFFFFF;
+ regs.stencil_back_mask = 0xFFFFFFFF;
+}
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
// Reset the current macro.
executing_macro = 0;
- // The requested macro must have been uploaded already.
- auto macro_code = uploaded_macros.find(method);
- if (macro_code == uploaded_macros.end()) {
- LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
+ // Lookup the macro offset
+ const u32 entry{(method - MacroRegistersStart) >> 1};
+ const auto& search{macro_offsets.find(entry)};
+ if (search == macro_offsets.end()) {
+ LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
+ UNREACHABLE();
return;
}
// Execute the current macro.
- macro_interpreter.Execute(macro_code->second, std::move(parameters));
+ macro_interpreter.Execute(search->second, std::move(parameters));
}
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
@@ -72,13 +121,23 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
}
- regs.reg_array[method] = value;
+ if (regs.reg_array[method] != value) {
+ regs.reg_array[method] = value;
+ if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
+ method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
+ dirty_flags.vertex_attrib_format = true;
+ }
+ }
switch (method) {
case MAXWELL3D_REG_INDEX(macros.data): {
ProcessMacroUpload(value);
break;
}
+ case MAXWELL3D_REG_INDEX(macros.bind): {
+ ProcessMacroBind(value);
+ break;
+ }
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -140,22 +199,25 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
}
void Maxwell3D::ProcessMacroUpload(u32 data) {
- // Store the uploaded macro code to interpret them when they're called.
- auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
- macro.push_back(data);
+ ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
+ "upload_address exceeded macro_memory size!");
+ macro_memory[regs.macros.upload_address++] = data;
+}
+
+void Maxwell3D::ProcessMacroBind(u32 data) {
+ macro_offsets[regs.macros.entry] = data;
}
void Maxwell3D::ProcessQueryGet() {
GPUVAddr sequence_address = regs.query.QueryAddress();
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
// VAddr before writing.
- boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
+ std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
- u32 value = Memory::Read32(*address);
u64 result = 0;
// TODO(Subv): Support the other query variables
@@ -268,7 +330,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
// Don't allow writing past the end of the buffer.
ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
- boost::optional<VAddr> address =
+ std::optional<VAddr> address =
memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
Memory::Write32(*address, value);
@@ -281,7 +343,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
GPUVAddr tic_base_address = regs.tic.TICAddress();
GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
- boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
+ std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
Texture::TICEntry tic_entry;
Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -305,7 +367,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
- boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
+ std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
Texture::TSCEntry tsc_entry;
Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -369,7 +431,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
- boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+ std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
Texture::FullTextureInfo tex_info{};
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0e09a7ee5..91ca57883 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -345,6 +345,14 @@ public:
Invert = 6,
IncrWrap = 7,
DecrWrap = 8,
+ KeepOGL = 0x1E00,
+ ZeroOGL = 0,
+ ReplaceOGL = 0x1E01,
+ IncrOGL = 0x1E02,
+ DecrOGL = 0x1E03,
+ InvertOGL = 0x150A,
+ IncrWrapOGL = 0x8507,
+ DecrWrapOGL = 0x8508,
};
enum class MemoryLayout : u32 {
@@ -462,6 +470,16 @@ public:
}
};
+ struct ColorMask {
+ union {
+ u32 raw;
+ BitField<0, 4, u32> R;
+ BitField<4, 4, u32> G;
+ BitField<8, 4, u32> B;
+ BitField<12, 4, u32> A;
+ };
+ };
+
bool IsShaderConfigEnabled(std::size_t index) const {
// The VertexB is always enabled.
if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -475,12 +493,13 @@ public:
INSERT_PADDING_WORDS(0x45);
struct {
- INSERT_PADDING_WORDS(1);
+ u32 upload_address;
u32 data;
u32 entry;
+ u32 bind;
} macros;
- INSERT_PADDING_WORDS(0x189);
+ INSERT_PADDING_WORDS(0x188);
u32 tfb_enabled;
@@ -570,7 +589,11 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_PADDING_WORDS(0x13);
+ INSERT_PADDING_WORDS(0xC);
+
+ u32 color_mask_common;
+
+ INSERT_PADDING_WORDS(0x6);
u32 rt_separate_frag_data;
@@ -645,8 +668,14 @@ public:
ComparisonOp depth_test_func;
float alpha_test_ref;
ComparisonOp alpha_test_func;
-
- INSERT_PADDING_WORDS(0x9);
+ u32 draw_tfb_stride;
+ struct {
+ float r;
+ float g;
+ float b;
+ float a;
+ } blend_color;
+ INSERT_PADDING_WORDS(0x4);
struct {
u32 separate_alpha;
@@ -723,7 +752,11 @@ public:
StencilOp stencil_back_op_zpass;
ComparisonOp stencil_back_func_func;
- INSERT_PADDING_WORDS(0x17);
+ INSERT_PADDING_WORDS(0x4);
+
+ u32 framebuffer_srgb;
+
+ INSERT_PADDING_WORDS(0x12);
union {
BitField<2, 1, u32> coord_origin;
@@ -751,7 +784,14 @@ public:
};
} draw;
- INSERT_PADDING_WORDS(0x6B);
+ INSERT_PADDING_WORDS(0xA);
+
+ struct {
+ u32 enabled;
+ u32 index;
+ } primitive_restart;
+
+ INSERT_PADDING_WORDS(0x5F);
struct {
u32 start_addr_high;
@@ -829,8 +869,9 @@ public:
BitField<6, 4, u32> RT;
BitField<10, 11, u32> layer;
} clear_buffers;
-
- INSERT_PADDING_WORDS(0x4B);
+ INSERT_PADDING_WORDS(0xB);
+ std::array<ColorMask, NumRenderTargets> color_mask;
+ INSERT_PADDING_WORDS(0x38);
struct {
u32 query_address_high;
@@ -971,6 +1012,12 @@ public:
State state{};
MemoryManager& memory_manager;
+ struct DirtyFlags {
+ bool vertex_attrib_format = true;
+ };
+
+ DirtyFlags dirty_flags;
+
/// Reads a register value located at the input method address
u32 GetRegisterValue(u32 method) const;
@@ -983,10 +1030,25 @@ public:
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
+ /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
+ /// we've seen used.
+ using MacroMemory = std::array<u32, 0x40000>;
+
+ /// Gets a reference to macro memory.
+ const MacroMemory& GetMacroMemory() const {
+ return macro_memory;
+ }
+
private:
+ void InitializeRegisterDefaults();
+
VideoCore::RasterizerInterface& rasterizer;
- std::unordered_map<u32, std::vector<u32>> uploaded_macros;
+ /// Start offsets of each macro in macro_memory
+ std::unordered_map<u32, u32> macro_offsets;
+
+ /// Memory for macro code
+ MacroMemory macro_memory;
/// Macro method that is currently being executed / being fed parameters.
u32 executing_macro = 0;
@@ -1009,9 +1071,12 @@ private:
*/
void CallMacroMethod(u32 method, std::vector<u32> parameters);
- /// Handles writes to the macro uploading registers.
+ /// Handles writes to the macro uploading register.
void ProcessMacroUpload(u32 data);
+ /// Handles writes to the macro bind register.
+ void ProcessMacroBind(u32 data);
+
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers();
@@ -1045,6 +1110,7 @@ ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1057,6 +1123,10 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB);
ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
+ASSERT_REG_POSITION(alpha_test_ref, 0x4C4);
+ASSERT_REG_POSITION(alpha_test_func, 0x4C5);
+ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6);
+ASSERT_REG_POSITION(blend_color, 0x4C7);
ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(stencil_enable, 0x4E0);
ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1);
@@ -1077,14 +1147,17 @@ ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
ASSERT_REG_POSITION(stencil_back_op_zfail, 0x567);
ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
+ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(logic_op, 0x671);
ASSERT_REG_POSITION(clear_buffers, 0x674);
+ASSERT_REG_POSITION(color_mask, 0x680);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 6cd08d28b..83a6fd875 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -5,12 +5,11 @@
#pragma once
#include <bitset>
+#include <optional>
#include <string>
#include <tuple>
#include <vector>
-#include <boost/optional.hpp>
-
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -79,6 +78,7 @@ union Attribute {
constexpr explicit Attribute(u64 value) : value(value) {}
enum class Index : u64 {
+ PointSize = 6,
Position = 7,
Attribute_0 = 8,
Attribute_31 = 39,
@@ -207,6 +207,16 @@ enum class UniformType : u64 {
Double = 5,
};
+enum class StoreType : u64 {
+ Unsigned8 = 0,
+ Signed8 = 1,
+ Unsigned16 = 2,
+ Signed16 = 3,
+ Bytes32 = 4,
+ Bytes64 = 5,
+ Bytes128 = 6,
+};
+
enum class IMinMaxExchange : u64 {
None = 0,
XLo = 1,
@@ -568,6 +578,10 @@ union Instruction {
} fmul32;
union {
+ BitField<52, 1, u64> generates_cc;
+ } op_32;
+
+ union {
BitField<48, 1, u64> is_signed;
} shift;
@@ -747,6 +761,18 @@ union Instruction {
} ld_c;
union {
+ BitField<48, 3, StoreType> type;
+ } ldst_sl;
+
+ union {
+ BitField<44, 2, u64> unknown;
+ } ld_l;
+
+ union {
+ BitField<44, 2, u64> unknown;
+ } st_l;
+
+ union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
BitField<7, 1, u64> abs_a;
@@ -1208,6 +1234,8 @@ union Instruction {
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
BitField<59, 1, u64> is_c_gpr;
+ BitField<20, 24, s64> smem_imm;
+ BitField<0, 5, ControlCode> flow_control_code;
Attribute attribute;
Sampler sampler;
@@ -1231,8 +1259,12 @@ public:
BRA,
PBK,
LD_A,
+ LD_L,
+ LD_S,
LD_C,
ST_A,
+ ST_L,
+ ST_S,
LDG, // Load from global memory
STG, // Store in global memory
TEX,
@@ -1428,7 +1460,7 @@ public:
Type type;
};
- static boost::optional<const Matcher&> Decode(Instruction instr) {
+ static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
static const auto table{GetDecodeTable()};
const auto matches_instruction = [instr](const auto& matcher) {
@@ -1436,7 +1468,8 @@ public:
};
auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
- return iter != table.end() ? boost::optional<const Matcher&>(*iter) : boost::none;
+ return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
+ : std::nullopt;
}
private:
@@ -1489,8 +1522,12 @@ private:
INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+ INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
+ INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+ INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
+ INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
@@ -1626,4 +1663,4 @@ private:
}
};
-} // namespace Tegra::Shader \ No newline at end of file
+} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index a885ee3cf..a0e015c4b 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -96,6 +96,11 @@ struct Header {
}
} ps;
};
+
+ u64 GetLocalMemorySize() {
+ return (common1.shader_local_memory_low_size |
+ (common2.shader_local_memory_high_size << 24));
+ }
};
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 377bd66ab..335a8d407 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -11,7 +11,7 @@ namespace Tegra {
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
-void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
+void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
Reset();
registers[1] = parameters[0];
this->parameters = std::move(parameters);
@@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
// Execute the code until we hit an exit condition.
bool keep_executing = true;
while (keep_executing) {
- keep_executing = Step(code, false);
+ keep_executing = Step(offset, false);
}
// Assert the the macro used all the input parameters
@@ -29,7 +29,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
void MacroInterpreter::Reset() {
registers = {};
pc = 0;
- delayed_pc = boost::none;
+ delayed_pc = {};
method_address.raw = 0;
parameters.clear();
// The next parameter index starts at 1, because $r1 already has the value of the first
@@ -37,17 +37,17 @@ void MacroInterpreter::Reset() {
next_parameter_index = 1;
}
-bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
+bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
u32 base_address = pc;
- Opcode opcode = GetOpcode(code);
+ Opcode opcode = GetOpcode(offset);
pc += 4;
// Update the program counter if we were delayed
- if (delayed_pc != boost::none) {
+ if (delayed_pc) {
ASSERT(is_delay_slot);
pc = *delayed_pc;
- delayed_pc = boost::none;
+ delayed_pc = {};
}
switch (opcode.operation) {
@@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
delayed_pc = base_address + opcode.GetBranchTarget();
// Execute one more instruction due to the delay slot.
- return Step(code, true);
+ return Step(offset, true);
}
break;
}
@@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
// Exit has a delay slot, execute the next instruction
// Note: Executing an exit during a branch delay slot will cause the instruction at the
// branch target to be executed before exiting.
- Step(code, true);
+ Step(offset, true);
return false;
}
return true;
}
-MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
+MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
+ const auto& macro_memory{maxwell3d.GetMacroMemory()};
ASSERT((pc % sizeof(u32)) == 0);
- ASSERT(pc < code.size() * sizeof(u32));
- return {code[pc / sizeof(u32)]};
+ ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
+ return {macro_memory[offset + pc / sizeof(u32)]};
}
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index cee0baaf3..62d1ce289 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -5,8 +5,9 @@
#pragma once
#include <array>
+#include <optional>
#include <vector>
-#include <boost/optional.hpp>
+
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -21,10 +22,10 @@ public:
/**
* Executes the macro code with the specified input parameters.
- * @param code The macro byte code to execute
- * @param parameters The parameters of the macro
+ * @param offset Offset to start execution at.
+ * @param parameters The parameters of the macro.
*/
- void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
+ void Execute(u32 offset, std::vector<u32> parameters);
private:
enum class Operation : u32 {
@@ -109,11 +110,11 @@ private:
/**
* Executes a single macro instruction located at the current program counter. Returns whether
* the interpreter should keep running.
- * @param code The macro code to execute.
+ * @param offset Offset to start execution at.
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
* previous instruction.
*/
- bool Step(const std::vector<u32>& code, bool is_delay_slot);
+ bool Step(u32 offset, bool is_delay_slot);
/// Calculates the result of an ALU operation. src_a OP src_b;
u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
@@ -126,7 +127,7 @@ private:
bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
/// Reads an opcode at the current program counter location.
- Opcode GetOpcode(const std::vector<u32>& code) const;
+ Opcode GetOpcode(u32 offset) const;
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
u32 GetRegister(u32 register_id) const;
@@ -149,7 +150,7 @@ private:
Engines::Maxwell3D& maxwell3d;
u32 pc; ///< Current program counter
- boost::optional<u32>
+ std::optional<u32>
delayed_pc; ///< Program counter to execute at after the delay slot is executed.
static constexpr std::size_t NumMacroRegisters = 8;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 022d4ab74..77a20bb84 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -4,18 +4,21 @@
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/logging/log.h"
#include "video_core/memory_manager.h"
namespace Tegra {
GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
- boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align);
- ASSERT(gpu_addr);
+ const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, align, PageStatus::Unmapped)};
- for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
- VAddr& slot = PageSlot(*gpu_addr + offset);
+ ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
+
+ for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+ VAddr& slot{PageSlot(*gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
slot = static_cast<u64>(PageStatus::Allocated);
}
@@ -23,10 +26,11 @@ GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
}
GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
- for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
- VAddr& slot = PageSlot(gpu_addr + offset);
+ for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+ VAddr& slot{PageSlot(gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
slot = static_cast<u64>(PageStatus::Allocated);
}
@@ -34,17 +38,19 @@ GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
}
GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
- boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE);
- ASSERT(gpu_addr);
+ const std::optional<GPUVAddr> gpu_addr{FindFreeBlock(0, size, PAGE_SIZE, PageStatus::Unmapped)};
+
+ ASSERT_MSG(gpu_addr, "unable to find available GPU memory");
- for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
- VAddr& slot = PageSlot(*gpu_addr + offset);
+ for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+ VAddr& slot{PageSlot(*gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Unmapped));
+
slot = cpu_addr + offset;
}
- MappedRegion region{cpu_addr, *gpu_addr, size};
+ const MappedRegion region{cpu_addr, *gpu_addr, size};
mapped_regions.push_back(region);
return *gpu_addr;
@@ -53,14 +59,31 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & PAGE_MASK) == 0);
- for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
- VAddr& slot = PageSlot(gpu_addr + offset);
+ if (PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Allocated)) {
+ // Page has been already mapped. In this case, we must find a new area of memory to use that
+ // is different than the specified one. Super Mario Odyssey hits this scenario when changing
+ // areas, but we do not want to overwrite the old pages.
+ // TODO(bunnei): We need to write a hardware test to confirm this behavior.
+
+ LOG_ERROR(HW_GPU, "attempting to map addr 0x{:016X}, which is not available!", gpu_addr);
+
+ const std::optional<GPUVAddr> new_gpu_addr{
+ FindFreeBlock(gpu_addr, size, PAGE_SIZE, PageStatus::Allocated)};
+
+ ASSERT_MSG(new_gpu_addr, "unable to find available GPU memory");
+
+ gpu_addr = *new_gpu_addr;
+ }
+
+ for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+ VAddr& slot{PageSlot(gpu_addr + offset)};
ASSERT(slot == static_cast<u64>(PageStatus::Allocated));
+
slot = cpu_addr + offset;
}
- MappedRegion region{cpu_addr, gpu_addr, size};
+ const MappedRegion region{cpu_addr, gpu_addr, size};
mapped_regions.push_back(region);
return gpu_addr;
@@ -69,11 +92,12 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
ASSERT((gpu_addr & PAGE_MASK) == 0);
- for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
- VAddr& slot = PageSlot(gpu_addr + offset);
+ for (u64 offset{}; offset < size; offset += PAGE_SIZE) {
+ VAddr& slot{PageSlot(gpu_addr + offset)};
ASSERT(slot != static_cast<u64>(PageStatus::Allocated) &&
slot != static_cast<u64>(PageStatus::Unmapped));
+
slot = static_cast<u64>(PageStatus::Unmapped);
}
@@ -97,13 +121,14 @@ GPUVAddr MemoryManager::GetRegionEnd(GPUVAddr region_start) const {
return {};
}
-boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
- GPUVAddr gpu_addr = 0;
- u64 free_space = 0;
+std::optional<GPUVAddr> MemoryManager::FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
+ PageStatus status) {
+ GPUVAddr gpu_addr{region_start};
+ u64 free_space{};
align = (align + PAGE_MASK) & ~PAGE_MASK;
while (gpu_addr + free_space < MAX_ADDRESS) {
- if (!IsPageMapped(gpu_addr + free_space)) {
+ if (PageSlot(gpu_addr + free_space) == static_cast<u64>(status)) {
free_space += PAGE_SIZE;
if (free_space >= size) {
return gpu_addr;
@@ -118,8 +143,8 @@ boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
return {};
}
-boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
- VAddr base_addr = PageSlot(gpu_addr);
+std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
+ const VAddr base_addr{PageSlot(gpu_addr)};
if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
base_addr == static_cast<u64>(PageStatus::Unmapped)) {
@@ -133,19 +158,15 @@ std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
std::vector<GPUVAddr> results;
for (const auto& region : mapped_regions) {
if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
- u64 offset = cpu_addr - region.cpu_addr;
+ const u64 offset{cpu_addr - region.cpu_addr};
results.push_back(region.gpu_addr + offset);
}
}
return results;
}
-bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) {
- return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped);
-}
-
VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
- auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
+ auto& block{page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]};
if (!block) {
block = std::make_unique<PageBlock>();
block->fill(static_cast<VAddr>(PageStatus::Unmapped));
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index caf80093f..4eb338aa2 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -6,10 +6,9 @@
#include <array>
#include <memory>
+#include <optional>
#include <vector>
-#include <boost/optional.hpp>
-
#include "common/common_types.h"
namespace Tegra {
@@ -27,7 +26,7 @@ public:
GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
- boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
+ std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
static constexpr u64 PAGE_BITS = 16;
@@ -35,15 +34,15 @@ public:
static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
private:
- boost::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1);
- bool IsPageMapped(GPUVAddr gpu_addr);
- VAddr& PageSlot(GPUVAddr gpu_addr);
-
enum class PageStatus : u64 {
Unmapped = 0xFFFFFFFFFFFFFFFFULL,
Allocated = 0xFFFFFFFFFFFFFFFEULL,
};
+ std::optional<GPUVAddr> FindFreeBlock(GPUVAddr region_start, u64 size, u64 align,
+ PageStatus status);
+ VAddr& PageSlot(GPUVAddr gpu_addr);
+
static constexpr u64 MAX_ADDRESS{0x10000000000ULL};
static constexpr u64 PAGE_TABLE_BITS{10};
static constexpr u64 PAGE_TABLE_SIZE{1 << PAGE_TABLE_BITS};
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
new file mode 100644
index 000000000..093b2cdf4
--- /dev/null
+++ b/src/video_core/rasterizer_cache.cpp
@@ -0,0 +1,7 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/rasterizer_cache.h"
+
+RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 0a3b3951e..bcf0c15a4 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -5,18 +5,19 @@
#pragma once
#include <set>
+#include <unordered_map>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range_core.hpp>
#include "common/common_types.h"
-#include "core/core.h"
#include "core/settings.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
class RasterizerCacheObject {
public:
+ virtual ~RasterizerCacheObject();
+
/// Gets the address of the shader in guest memory, required for cache management
virtual VAddr GetAddr() const = 0;
@@ -64,6 +65,8 @@ class RasterizerCache : NonCopyable {
friend class RasterizerCacheObject;
public:
+ explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+
/// Write any cached resources overlapping the specified region back to memory
void FlushRegion(Tegra::GPUVAddr addr, size_t size) {
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -86,45 +89,39 @@ public:
/// Invalidates everything in the cache
void InvalidateAll() {
- while (object_cache.begin() != object_cache.end()) {
- Unregister(*object_cache.begin()->second.begin());
+ while (interval_cache.begin() != interval_cache.end()) {
+ Unregister(*interval_cache.begin()->second.begin());
}
}
protected:
/// Tries to get an object from the cache with the specified address
T TryGet(VAddr addr) const {
- const ObjectInterval interval{addr};
- for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
- for (auto& cached_object : pair.second) {
- if (cached_object->GetAddr() == addr) {
- return cached_object;
- }
- }
- }
+ const auto iter = map_cache.find(addr);
+ if (iter != map_cache.end())
+ return iter->second;
return nullptr;
}
/// Register an object into the cache
void Register(const T& object) {
object->SetIsRegistered(true);
- object_cache.add({GetInterval(object), ObjectSet{object}});
- auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
+ interval_cache.add({GetInterval(object), ObjectSet{object}});
+ map_cache.insert({object->GetAddr(), object});
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
}
/// Unregisters an object from the cache
void Unregister(const T& object) {
object->SetIsRegistered(false);
- auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer();
rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
-
// Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
if (Settings::values.use_accurate_gpu_emulation) {
FlushObject(object);
}
- object_cache.subtract({GetInterval(object), ObjectSet{object}});
+ interval_cache.subtract({GetInterval(object), ObjectSet{object}});
+ map_cache.erase(object->GetAddr());
}
/// Returns a ticks counter used for tracking when cached objects were last modified
@@ -141,7 +138,7 @@ private:
std::vector<T> objects;
const ObjectInterval interval{addr, addr + size};
- for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) {
+ for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
for (auto& cached_object : pair.second) {
if (!cached_object) {
continue;
@@ -167,14 +164,17 @@ private:
}
using ObjectSet = std::set<T>;
- using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>;
- using ObjectInterval = typename ObjectCache::interval_type;
+ using ObjectCache = std::unordered_map<VAddr, T>;
+ using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
+ using ObjectInterval = typename IntervalCache::interval_type;
static auto GetInterval(const T& object) {
return ObjectInterval::right_open(object->GetAddr(),
object->GetAddr() + object->GetSizeInBytes());
}
- ObjectCache object_cache; ///< Cache of objects
- u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
+ ObjectCache map_cache;
+ IntervalCache interval_cache; ///< Cache of objects
+ u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
+ VideoCore::RasterizerInterface& rasterizer;
};
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index 0df3725c2..1482cdb40 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -5,7 +5,6 @@
#include "core/frontend/emu_window.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace VideoCore {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2cd0738ff..669e26e15 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -6,7 +6,8 @@
#include <atomic>
#include <memory>
-#include <boost/optional.hpp>
+#include <optional>
+
#include "common/common_types.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
@@ -28,7 +29,8 @@ public:
virtual ~RendererBase();
/// Swap buffers (render frame)
- virtual void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) = 0;
+ virtual void SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
/// Initialize the renderer
virtual bool Init() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index c142095c5..075192c3f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -9,15 +9,17 @@
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
namespace OpenGL {
-OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
+OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
+ : RasterizerCache{rasterizer}, stream_buffer(GL_ARRAY_BUFFER, size) {}
GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
std::size_t alignment, bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+ const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index be29dc8be..91fca3f6c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -15,6 +15,8 @@
namespace OpenGL {
+class RasterizerOpenGL;
+
struct CachedBufferEntry final : public RasterizerCacheObject {
VAddr GetAddr() const override {
return addr;
@@ -35,7 +37,7 @@ struct CachedBufferEntry final : public RasterizerCacheObject {
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
public:
- explicit OGLBufferCache(std::size_t size);
+ explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index ee1d9601b..d9ed08437 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -6,6 +6,7 @@
#include <array>
#include "common/assert.h"
#include "common/common_types.h"
+#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
@@ -45,7 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+ const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
const u8* source{Memory::GetPointer(*cpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index be51c5215..84bd91eed 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,10 +30,11 @@
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using PixelFormat = SurfaceParams::PixelFormat;
-using SurfaceType = SurfaceParams::SurfaceType;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using SurfaceType = VideoCore::Surface::SurfaceType;
-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192));
@@ -79,7 +80,8 @@ struct DrawParameters {
};
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
- : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
+ : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
+ buffer_cache(*this, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
@@ -104,9 +106,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
}
ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported");
-
- // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
- state.clip_distance[0] = true;
+ OpenGLState::ApplyDefaultState();
// Create render framebuffer
framebuffer.Create();
@@ -115,8 +115,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
state.draw.shader_program = 0;
state.Apply();
- glEnable(GL_BLEND);
-
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
@@ -124,18 +122,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
RasterizerOpenGL::~RasterizerOpenGL() {}
-void RasterizerOpenGL::SetupVertexArrays() {
- MICROPROFILE_SCOPE(OpenGL_VAO);
- const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+void RasterizerOpenGL::SetupVertexFormat() {
+ auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
+ if (!gpu.dirty_flags.vertex_attrib_format)
+ return;
+ gpu.dirty_flags.vertex_attrib_format = false;
+
+ MICROPROFILE_SCOPE(OpenGL_VAO);
+
auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
auto& VAO = iter->second;
if (is_cache_miss) {
VAO.Create();
state.draw.vertex_array = VAO.handle;
- state.Apply();
+ state.ApplyVertexBufferState();
// The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
// around.
@@ -177,8 +180,13 @@ void RasterizerOpenGL::SetupVertexArrays() {
}
}
state.draw.vertex_array = VAO.handle;
- state.draw.vertex_buffer = buffer_cache.GetHandle();
- state.Apply();
+ state.ApplyVertexBufferState();
+}
+
+void RasterizerOpenGL::SetupVertexBuffer() {
+ MICROPROFILE_SCOPE(OpenGL_VB);
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
// Upload all guest vertex arrays sequentially to our buffer
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -205,6 +213,9 @@ void RasterizerOpenGL::SetupVertexArrays() {
glVertexBindingDivisor(index, 0);
}
}
+
+ // Implicit set by glBindVertexBuffer. Stupid glstate handling...
+ state.draw.vertex_buffer = buffer_cache.GetHandle();
}
DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -329,8 +340,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
index++;
}
}
-
- state.Apply();
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -399,9 +408,9 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
cached_pages.add({pages_interval, delta});
}
-void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
- bool preserve_contents,
- boost::optional<std::size_t> single_color_target) {
+void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
+ bool using_depth_fb, bool preserve_contents,
+ std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
@@ -416,8 +425,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
// Bind the framebuffer surfaces
- state.draw.draw_framebuffer = framebuffer.handle;
- state.Apply();
+ current_state.draw.draw_framebuffer = framebuffer.handle;
+ current_state.ApplyFramebufferState();
+ current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
if (using_color_fb) {
if (single_color_target) {
@@ -429,6 +439,9 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
color_surface->MarkAsModified(true, res_cache);
+ // Workaround for and issue in nvidia drivers
+ // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
+ state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
}
glFramebufferTexture2D(
@@ -446,6 +459,11 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
// Assume that a surface will be written to if it is used as a framebuffer, even
// if the shader doesn't actually write to it.
color_surface->MarkAsModified(true, res_cache);
+ // Enable sRGB only for supported formats
+ // Workaround for and issue in nvidia drivers
+ // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
+ state.framebuffer_srgb.enabled |=
+ color_surface->GetSurfaceParams().srgb_conversion;
}
buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
@@ -487,10 +505,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
}
-
- SyncViewport();
-
- state.Apply();
+ SyncViewport(current_state);
}
void RasterizerOpenGL::Clear() {
@@ -503,22 +518,23 @@ void RasterizerOpenGL::Clear() {
bool use_stencil{};
OpenGLState clear_state;
- clear_state.draw.draw_framebuffer = framebuffer.handle;
- clear_state.color_mask.red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
- clear_state.color_mask.green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
- clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
- clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
-
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
}
+ if (use_color) {
+ clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
+ clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
+ clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
+ clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
+ }
if (regs.clear_buffers.Z) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
use_depth = true;
// Always enable the depth write when clearing the depth buffer. The depth write mask is
- // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
+ // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to
+ // true.
clear_state.depth.test_enabled = true;
clear_state.depth.test_func = GL_ALWAYS;
}
@@ -535,9 +551,8 @@ void RasterizerOpenGL::Clear() {
ScopeAcquireGLContext acquire_context{emu_window};
- ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+ ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
regs.clear_buffers.RT.Value());
-
clear_state.Apply();
if (use_color) {
@@ -563,13 +578,14 @@ void RasterizerOpenGL::DrawArrays() {
ScopeAcquireGLContext acquire_context{emu_window};
- ConfigureFramebuffers();
-
+ ConfigureFramebuffers(state);
+ SyncColorMask();
SyncDepthTestState();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
+ SyncPrimitiveRestart();
SyncScissorTest();
// Alpha Testing is synced on shaders.
SyncTransformFeedback();
@@ -583,7 +599,7 @@ void RasterizerOpenGL::DrawArrays() {
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
state.draw.vertex_buffer = buffer_cache.GetHandle();
- state.Apply();
+ state.ApplyVertexBufferState();
std::size_t buffer_size = CalculateVertexArraysSize();
@@ -610,7 +626,8 @@ void RasterizerOpenGL::DrawArrays() {
buffer_cache.Map(buffer_size);
- SetupVertexArrays();
+ SetupVertexFormat();
+ SetupVertexBuffer();
DrawParameters params = SetupDraw();
SetupShaders(params.primitive_mode);
@@ -690,7 +707,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// Verify that the cached surface is the same size and format as the requested framebuffer
const auto& params{surface->GetSurfaceParams()};
- const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)};
+ const auto& pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
@@ -713,16 +731,20 @@ void RasterizerOpenGL::SamplerInfo::Create() {
glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
}
-void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
+void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::FullTextureInfo& info) {
const GLuint s = sampler.handle;
-
+ const Tegra::Texture::TSCEntry& config = info.tsc;
if (mag_filter != config.mag_filter) {
mag_filter = config.mag_filter;
- glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, MaxwellToGL::TextureFilterMode(mag_filter));
+ glSamplerParameteri(
+ s, GL_TEXTURE_MAG_FILTER,
+ MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
}
- if (min_filter != config.min_filter) {
+ if (min_filter != config.min_filter || mip_filter != config.mip_filter) {
min_filter = config.min_filter;
- glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, MaxwellToGL::TextureFilterMode(min_filter));
+ mip_filter = config.mip_filter;
+ glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,
+ MaxwellToGL::TextureFilterMode(min_filter, mip_filter));
}
if (wrap_u != config.wrap_u) {
@@ -762,6 +784,22 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data());
}
}
+ if (info.tic.use_header_opt_control == 0) {
+ if (GLAD_GL_ARB_texture_filter_anisotropic) {
+ glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY,
+ static_cast<float>(1 << info.tic.max_anisotropy.Value()));
+ } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT,
+ static_cast<float>(1 << info.tic.max_anisotropy.Value()));
+ }
+ glSamplerParameterf(s, GL_TEXTURE_MIN_LOD,
+ static_cast<float>(info.tic.res_min_mip_level.Value()));
+ glSamplerParameterf(s, GL_TEXTURE_MAX_LOD,
+ static_cast<float>(info.tic.res_max_mip_level.Value() == 0
+ ? 16
+ : info.tic.res_max_mip_level.Value()));
+ glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, info.tic.mip_lod_bias.Value() / 256.f);
+ }
}
u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
@@ -859,7 +897,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
continue;
}
- texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+ texture_samplers[current_bindpoint].SyncWithConfig(texture);
Surface surface = res_cache.GetTextureSurface(texture, entry);
if (surface != nullptr) {
state.texture_units[current_bindpoint].texture = surface->Texture().handle;
@@ -881,14 +919,18 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
return current_unit + static_cast<u32>(entries.size());
}
-void RasterizerOpenGL::SyncViewport() {
+void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-
- state.viewport.x = viewport_rect.left;
- state.viewport.y = viewport_rect.bottom;
- state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
- state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
+ for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+ const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
+ auto& viewport = current_state.viewports[i];
+ viewport.x = viewport_rect.left;
+ viewport.y = viewport_rect.bottom;
+ viewport.width = static_cast<GLfloat>(viewport_rect.GetWidth());
+ viewport.height = static_cast<GLfloat>(viewport_rect.GetHeight());
+ viewport.depth_range_far = regs.viewport[i].depth_range_far;
+ viewport.depth_range_near = regs.viewport[i].depth_range_near;
+ }
}
void RasterizerOpenGL::SyncClipEnabled() {
@@ -923,12 +965,11 @@ void RasterizerOpenGL::SyncCullMode() {
}
}
-void RasterizerOpenGL::SyncDepthScale() {
- UNREACHABLE();
-}
+void RasterizerOpenGL::SyncPrimitiveRestart() {
+ const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-void RasterizerOpenGL::SyncDepthOffset() {
- UNREACHABLE();
+ state.primitive_restart.enabled = regs.primitive_restart.enabled;
+ state.primitive_restart.index = regs.primitive_restart.index;
}
void RasterizerOpenGL::SyncDepthTestState() {
@@ -951,9 +992,6 @@ void RasterizerOpenGL::SyncStencilTestState() {
return;
}
- // TODO(bunnei): Verify behavior when this is not set
- ASSERT(regs.stencil_two_side_enable);
-
state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
state.stencil.front.test_ref = regs.stencil_front_func_ref;
state.stencil.front.test_mask = regs.stencil_front_func_mask;
@@ -961,36 +999,79 @@ void RasterizerOpenGL::SyncStencilTestState() {
state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail);
state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass);
state.stencil.front.write_mask = regs.stencil_front_mask;
+ if (regs.stencil_two_side_enable) {
+ state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
+ state.stencil.back.test_ref = regs.stencil_back_func_ref;
+ state.stencil.back.test_mask = regs.stencil_back_func_mask;
+ state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
+ state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
+ state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
+ state.stencil.back.write_mask = regs.stencil_back_mask;
+ } else {
+ state.stencil.back.test_func = GL_ALWAYS;
+ state.stencil.back.test_ref = 0;
+ state.stencil.back.test_mask = 0xFFFFFFFF;
+ state.stencil.back.write_mask = 0xFFFFFFFF;
+ state.stencil.back.action_stencil_fail = GL_KEEP;
+ state.stencil.back.action_depth_fail = GL_KEEP;
+ state.stencil.back.action_depth_pass = GL_KEEP;
+ }
+}
- state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
- state.stencil.back.test_ref = regs.stencil_back_func_ref;
- state.stencil.back.test_mask = regs.stencil_back_func_mask;
- state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
- state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
- state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
- state.stencil.back.write_mask = regs.stencil_back_mask;
+void RasterizerOpenGL::SyncColorMask() {
+ const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+ const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
+ auto& dest = state.color_mask[i];
+ dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
+ dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
+ dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
+ dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
+ }
}
void RasterizerOpenGL::SyncBlendState() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- // TODO(Subv): Support more than just render target 0.
- state.blend.enabled = regs.blend.enable[0] != 0;
-
- if (!state.blend.enabled)
+ state.blend_color.red = regs.blend_color.r;
+ state.blend_color.green = regs.blend_color.g;
+ state.blend_color.blue = regs.blend_color.b;
+ state.blend_color.alpha = regs.blend_color.a;
+
+ state.independant_blend.enabled = regs.independent_blend_enable;
+ if (!state.independant_blend.enabled) {
+ auto& blend = state.blend[0];
+ blend.enabled = regs.blend.enable[0] != 0;
+ blend.separate_alpha = regs.blend.separate_alpha;
+ blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
+ blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
+ blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
+ if (blend.separate_alpha) {
+ blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
+ blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
+ blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
+ }
+ for (size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+ state.blend[i].enabled = false;
+ }
return;
+ }
- ASSERT_MSG(regs.logic_op.enable == 0,
- "Blending and logic op can't be enabled at the same time.");
-
- ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
- ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
- state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
- state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
- state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
- state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
- state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
- state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
+ for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+ auto& blend = state.blend[i];
+ blend.enabled = regs.blend.enable[i] != 0;
+ if (!blend.enabled)
+ continue;
+ blend.separate_alpha = regs.independent_blend[i].separate_alpha;
+ blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_rgb);
+ blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_rgb);
+ blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_rgb);
+ if (blend.separate_alpha) {
+ blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[i].equation_a);
+ blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_source_a);
+ blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[i].factor_dest_a);
+ }
+ }
}
void RasterizerOpenGL::SyncLogicOpState() {
@@ -1009,19 +1090,19 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest() {
+ // TODO: what is the correct behavior here, a single scissor for all targets
+ // or scissor disabled for the rest of the targets?
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-
state.scissor.enabled = (regs.scissor_test.enable != 0);
- // TODO(Blinkhawk): Figure if the hardware supports scissor testing per viewport and how it's
- // implemented.
- if (regs.scissor_test.enable != 0) {
- const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
- const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
- state.scissor.x = regs.scissor_test.min_x;
- state.scissor.y = regs.scissor_test.min_y;
- state.scissor.width = width;
- state.scissor.height = height;
+ if (regs.scissor_test.enable == 0) {
+ return;
}
+ const u32 width = regs.scissor_test.max_x - regs.scissor_test.min_x;
+ const u32 height = regs.scissor_test.max_y - regs.scissor_test.min_y;
+ state.scissor.x = regs.scissor_test.min_x;
+ state.scissor.y = regs.scissor_test.min_y;
+ state.scissor.width = width;
+ state.scissor.height = height;
}
void RasterizerOpenGL::SyncTransformFeedback() {
@@ -1046,9 +1127,8 @@ void RasterizerOpenGL::CheckAlphaTests() {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) {
- LOG_CRITICAL(
- Render_OpenGL,
- "Alpha Testing is enabled with Multiple Render Targets, this behavior is undefined.");
+ LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, "
+ "this behavior is undefined.");
UNREACHABLE();
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 0e90a31f5..8ef0f6c12 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -8,12 +8,12 @@
#include <cstddef>
#include <map>
#include <memory>
+#include <optional>
#include <tuple>
#include <utility>
#include <vector>
#include <boost/icl/interval_map.hpp>
-#include <boost/optional.hpp>
#include <boost/range/iterator_range.hpp>
#include <glad/glad.h>
@@ -88,11 +88,12 @@ private:
/// SamplerInfo struct.
void Create();
/// Syncs the sampler object with the config, updating any necessary state.
- void SyncWithConfig(const Tegra::Texture::TSCEntry& config);
+ void SyncWithConfig(const Tegra::Texture::FullTextureInfo& info);
private:
Tegra::Texture::TextureFilter mag_filter;
Tegra::Texture::TextureFilter min_filter;
+ Tegra::Texture::TextureMipmapFilter mip_filter;
Tegra::Texture::WrapMode wrap_u;
Tegra::Texture::WrapMode wrap_v;
Tegra::Texture::WrapMode wrap_p;
@@ -108,9 +109,9 @@ private:
* @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
* @param single_color_target Specifies if a single color buffer target should be used.
*/
- void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
- bool preserve_contents = true,
- boost::optional<std::size_t> single_color_target = {});
+ void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
+ bool using_depth_fb = true, bool preserve_contents = true,
+ std::optional<std::size_t> single_color_target = {});
/*
* Configures the current constbuffers to use for the draw command.
@@ -132,8 +133,8 @@ private:
u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit);
- /// Syncs the viewport to match the guest state
- void SyncViewport();
+ /// Syncs the viewport and depth range to match the guest state
+ void SyncViewport(OpenGLState& current_state);
/// Syncs the clip enabled status to match the guest state
void SyncClipEnabled();
@@ -144,11 +145,8 @@ private:
/// Syncs the cull mode to match the guest state
void SyncCullMode();
- /// Syncs the depth scale to match the guest state
- void SyncDepthScale();
-
- /// Syncs the depth offset to match the guest state
- void SyncDepthOffset();
+ /// Syncs the primitve restart to match the guest state
+ void SyncPrimitiveRestart();
/// Syncs the depth test state to match the guest state
void SyncDepthTestState();
@@ -171,6 +169,9 @@ private:
/// Syncs the point state to match the guest state
void SyncPointState();
+ /// Syncs Color Mask
+ void SyncColorMask();
+
/// Check asserts for alpha testing.
void CheckAlphaTests();
@@ -206,7 +207,8 @@ private:
std::size_t CalculateIndexBufferSize() const;
- void SetupVertexArrays();
+ void SetupVertexFormat();
+ void SetupVertexBuffer();
DrawParameters SetupDraw();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 9c8925383..9ca82c06c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,16 +15,24 @@
#include "core/memory.h"
#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/utils.h"
+#include "video_core/surface.h"
#include "video_core/textures/astc.h"
#include "video_core/textures/decoders.h"
#include "video_core/utils.h"
namespace OpenGL {
-using SurfaceType = SurfaceParams::SurfaceType;
-using PixelFormat = SurfaceParams::PixelFormat;
-using ComponentType = SurfaceParams::ComponentType;
+using VideoCore::Surface::ComponentTypeFromDepthFormat;
+using VideoCore::Surface::ComponentTypeFromRenderTarget;
+using VideoCore::Surface::ComponentTypeFromTexture;
+using VideoCore::Surface::PixelFormatFromDepthFormat;
+using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
+using VideoCore::Surface::PixelFormatFromTextureFormat;
+using VideoCore::Surface::SurfaceTargetFromTextureType;
struct FormatTuple {
GLint internal_format;
@@ -34,34 +42,6 @@ struct FormatTuple {
bool compressed;
};
-static bool IsPixelFormatASTC(PixelFormat format) {
- switch (format) {
- case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::ASTC_2D_5X4:
- case PixelFormat::ASTC_2D_8X8:
- case PixelFormat::ASTC_2D_8X5:
- return true;
- default:
- return false;
- }
-}
-
-static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
- switch (format) {
- case PixelFormat::ASTC_2D_4X4:
- return {4, 4};
- case PixelFormat::ASTC_2D_5X4:
- return {5, 4};
- case PixelFormat::ASTC_2D_8X8:
- return {8, 8};
- case PixelFormat::ASTC_2D_8X5:
- return {8, 5};
- default:
- LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
- UNREACHABLE();
- }
-}
-
void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
@@ -78,6 +58,36 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
}
}
+std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only,
+ bool uncompressed) const {
+ const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
+ const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
+ const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
+ u32 m_depth = (layer_only ? 1U : depth);
+ u32 m_width = MipWidth(mip_level);
+ u32 m_height = MipHeight(mip_level);
+ m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x);
+ m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y);
+ m_depth = std::max(1U, m_depth >> mip_level);
+ u32 m_block_height = MipBlockHeight(mip_level);
+ u32 m_block_depth = MipBlockDepth(mip_level);
+ return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width,
+ m_height, m_depth, m_block_height, m_block_depth);
+}
+
+std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
+ bool uncompressed) const {
+ std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth;
+ std::size_t size = 0;
+ for (u32 i = 0; i < max_mip_level; i++) {
+ size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed);
+ }
+ if (!force_gl && is_tiled) {
+ size = Common::AlignUp(size, block_size_bytes);
+ }
+ return size;
+}
+
/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
SurfaceParams params{};
@@ -85,8 +95,9 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
- params.pixel_format =
- PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value());
+ params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
+ params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
+ params.srgb_conversion);
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
params.type = GetFormatType(params.pixel_format);
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
@@ -117,6 +128,13 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.target = SurfaceTarget::Texture2D;
}
break;
+ case SurfaceTarget::TextureCubeArray:
+ params.depth = config.tic.Depth() * 6;
+ if (!entry.IsArray()) {
+ ASSERT(params.depth == 6);
+ params.target = SurfaceTarget::TextureCubemap;
+ }
+ break;
default:
LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
UNREACHABLE();
@@ -124,6 +142,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
break;
}
+ params.is_layered = SurfaceTargetIsLayered(params.target);
params.max_mip_level = config.tic.max_mip_level + 1;
params.rt = {};
@@ -142,6 +161,8 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.block_height = 1 << config.memory_layout.block_height;
params.block_depth = 1 << config.memory_layout.block_depth;
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+ params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
+ config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
@@ -149,7 +170,8 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
- params.max_mip_level = 0;
+ params.max_mip_level = 1;
+ params.is_layered = false;
// Render target specific parameters, not used for caching
params.rt.index = static_cast<u32>(index);
@@ -176,12 +198,14 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.pixel_format = PixelFormatFromDepthFormat(format);
params.component_type = ComponentTypeFromDepthFormat(format);
params.type = GetFormatType(params.pixel_format);
+ params.srgb_conversion = false;
params.width = zeta_width;
params.height = zeta_height;
params.unaligned_height = zeta_height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
- params.max_mip_level = 0;
+ params.max_mip_level = 1;
+ params.is_layered = false;
params.rt = {};
params.InitCacheParameters(zeta_address);
@@ -198,6 +222,8 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
+ params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
+ config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
params.component_type = ComponentTypeFromRenderTarget(config.format);
params.type = GetFormatType(params.pixel_format);
params.width = config.width;
@@ -205,7 +231,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
params.unaligned_height = config.height;
params.target = SurfaceTarget::Texture2D;
params.depth = 1;
- params.max_mip_level = 0;
+ params.max_mip_level = 1;
params.rt = {};
params.InitCacheParameters(config.Address());
@@ -213,7 +239,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
return params;
}
-static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
+static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
@@ -229,7 +255,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
- {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
true}, // DXT23
@@ -263,14 +289,33 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
- {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
+ false}, // RGBA8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
+ {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
+ {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
+ // Compressed sRGB formats
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // DXT1_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // DXT23_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
+ true}, // DXT45_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8,
+ ComponentType::UNorm, true}, // BC7U_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
// Depth formats
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
@@ -286,20 +331,22 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
ComponentType::Float, false}, // Z32FS8
}};
-static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
+static GLenum SurfaceTargetToGL(SurfaceTarget target) {
switch (target) {
- case SurfaceParams::SurfaceTarget::Texture1D:
+ case SurfaceTarget::Texture1D:
return GL_TEXTURE_1D;
- case SurfaceParams::SurfaceTarget::Texture2D:
+ case SurfaceTarget::Texture2D:
return GL_TEXTURE_2D;
- case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture3D:
return GL_TEXTURE_3D;
- case SurfaceParams::SurfaceTarget::Texture1DArray:
+ case SurfaceTarget::Texture1DArray:
return GL_TEXTURE_1D_ARRAY;
- case SurfaceParams::SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::Texture2DArray:
return GL_TEXTURE_2D_ARRAY;
- case SurfaceParams::SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::TextureCubemap:
return GL_TEXTURE_CUBE_MAP;
+ case SurfaceTarget::TextureCubeArray:
+ return GL_TEXTURE_CUBE_MAP_ARRAY_ARB;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
UNREACHABLE();
@@ -314,57 +361,41 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType
return format;
}
-MathUtil::Rectangle<u32> SurfaceParams::GetRect() const {
- u32 actual_height{unaligned_height};
+MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
+ u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
if (IsPixelFormatASTC(pixel_format)) {
// ASTC formats must stop at the ATSC block size boundary
actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
}
- return {0, actual_height, width, 0};
-}
-
-/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
-static bool IsFormatBCn(PixelFormat format) {
- switch (format) {
- case PixelFormat::DXT1:
- case PixelFormat::DXT23:
- case PixelFormat::DXT45:
- case PixelFormat::DXN1:
- case PixelFormat::DXN2SNORM:
- case PixelFormat::DXN2UNORM:
- case PixelFormat::BC7U:
- case PixelFormat::BC6H_UF16:
- case PixelFormat::BC6H_SF16:
- return true;
- }
- return false;
+ return {0, actual_height, MipWidth(mip_level), 0};
}
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
std::size_t gl_buffer_size, VAddr addr) {
- constexpr u32 bytes_per_pixel = SurfaceParams::GetBytesPerPixel(format);
+ constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
- const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
+ const u32 tile_size_x{GetDefaultBlockWidth(format)};
+ const u32 tile_size_y{GetDefaultBlockHeight(format)};
if (morton_to_gl) {
- const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
- addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
- const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
- memcpy(gl_buffer, data.data(), size_to_copy);
+ Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
+ stride, height, depth, block_height, block_depth);
} else {
- Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth,
+ Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
+ (height + tile_size_y - 1) / tile_size_y, depth,
bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
gl_buffer, false, block_height, block_depth);
}
}
-static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
- SurfaceParams::MaxPixelFormat>
- morton_to_gl_fns = {
- // clang-format off
+using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
+ VideoCore::Surface::MaxPixelFormat>;
+
+static constexpr GLConversionArray morton_to_gl_fns = {
+ // clang-format off
MortonCopy<true, PixelFormat::ABGR8U>,
MortonCopy<true, PixelFormat::ABGR8S>,
MortonCopy<true, PixelFormat::ABGR8UI>,
@@ -405,7 +436,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
MortonCopy<true, PixelFormat::RG16I>,
MortonCopy<true, PixelFormat::RG16S>,
MortonCopy<true, PixelFormat::RGB32F>,
- MortonCopy<true, PixelFormat::SRGBA8>,
+ MortonCopy<true, PixelFormat::RGBA8_SRGB>,
MortonCopy<true, PixelFormat::RG8U>,
MortonCopy<true, PixelFormat::RG8S>,
MortonCopy<true, PixelFormat::RG32UI>,
@@ -413,18 +444,29 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
+ MortonCopy<true, PixelFormat::BGRA8_SRGB>,
+ MortonCopy<true, PixelFormat::DXT1_SRGB>,
+ MortonCopy<true, PixelFormat::DXT23_SRGB>,
+ MortonCopy<true, PixelFormat::DXT45_SRGB>,
+ MortonCopy<true, PixelFormat::BC7U_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
+ MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
+ MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
MortonCopy<true, PixelFormat::Z32F>,
MortonCopy<true, PixelFormat::Z16>,
MortonCopy<true, PixelFormat::Z24S8>,
MortonCopy<true, PixelFormat::S8Z24>,
MortonCopy<true, PixelFormat::Z32FS8>,
- // clang-format on
+ // clang-format on
};
-static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
- SurfaceParams::MaxPixelFormat>
- gl_to_morton_fns = {
- // clang-format off
+static constexpr GLConversionArray gl_to_morton_fns = {
+ // clang-format off
MortonCopy<false, PixelFormat::ABGR8U>,
MortonCopy<false, PixelFormat::ABGR8S>,
MortonCopy<false, PixelFormat::ABGR8UI>,
@@ -466,7 +508,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
MortonCopy<false, PixelFormat::RG16I>,
MortonCopy<false, PixelFormat::RG16S>,
MortonCopy<false, PixelFormat::RGB32F>,
- MortonCopy<false, PixelFormat::SRGBA8>,
+ MortonCopy<false, PixelFormat::RGBA8_SRGB>,
MortonCopy<false, PixelFormat::RG8U>,
MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::RG32UI>,
@@ -474,17 +516,61 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t,
nullptr,
nullptr,
nullptr,
+ MortonCopy<false, PixelFormat::BGRA8_SRGB>,
+ MortonCopy<false, PixelFormat::DXT1_SRGB>,
+ MortonCopy<false, PixelFormat::DXT23_SRGB>,
+ MortonCopy<false, PixelFormat::DXT45_SRGB>,
+ MortonCopy<false, PixelFormat::BC7U_SRGB>,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
MortonCopy<false, PixelFormat::Z32F>,
MortonCopy<false, PixelFormat::Z16>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32FS8>,
- // clang-format on
+ // clang-format on
};
+void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
+ std::vector<u8>& gl_buffer, u32 mip_level) {
+ u32 depth = params.MipDepth(mip_level);
+ if (params.target == SurfaceTarget::Texture2D) {
+ // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
+ depth = 1U;
+ }
+ if (params.is_layered) {
+ u64 offset = params.GetMipmapLevelOffset(mip_level);
+ u64 offset_gl = 0;
+ const u64 layer_size = params.LayerMemorySize();
+ const u64 gl_size = params.LayerSizeGL(mip_level);
+ for (u32 i = 0; i < params.depth; i++) {
+ functions[static_cast<std::size_t>(params.pixel_format)](
+ params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
+ params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1,
+ gl_buffer.data() + offset_gl, gl_size, params.addr + offset);
+ offset += layer_size;
+ offset_gl += gl_size;
+ }
+ } else {
+ const u64 offset = params.GetMipmapLevelOffset(mip_level);
+ functions[static_cast<std::size_t>(params.pixel_format)](
+ params.MipWidth(mip_level), params.MipBlockHeight(mip_level),
+ params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(),
+ gl_buffer.size(), params.addr + offset);
+ }
+}
+
+MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+ MICROPROFILE_SCOPE(OpenGL_BlitSurface);
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
@@ -495,19 +581,21 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
OpenGLState state;
state.draw.read_framebuffer = read_fb_handle;
state.draw.draw_framebuffer = draw_fb_handle;
- state.Apply();
+ // Set sRGB enabled if the destination surfaces need it
+ state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
+ state.ApplyFramebufferState();
u32 buffers{};
if (src_params.type == SurfaceType::ColorTexture) {
switch (src_params.target) {
- case SurfaceParams::SurfaceTarget::Texture2D:
+ case SurfaceTarget::Texture2D:
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
GL_TEXTURE_2D, src_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
- case SurfaceParams::SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::TextureCubemap:
glFramebufferTexture2D(
GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
@@ -516,12 +604,12 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
break;
- case SurfaceParams::SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::Texture2DArray:
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
src_surface->Texture().handle, 0, 0);
glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
break;
- case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture3D:
glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
SurfaceTargetToGL(src_params.target),
src_surface->Texture().handle, 0, 0);
@@ -537,13 +625,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
}
switch (dst_params.target) {
- case SurfaceParams::SurfaceTarget::Texture2D:
+ case SurfaceTarget::Texture2D:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0, 0);
break;
- case SurfaceParams::SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::TextureCubemap:
glFramebufferTexture2D(
GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
@@ -552,13 +640,13 @@ static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
break;
- case SurfaceParams::SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::Texture2DArray:
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
dst_surface->Texture().handle, 0, 0);
glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
break;
- case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture3D:
glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
SurfaceTargetToGL(dst_params.target),
dst_surface->Texture().handle, 0, 0);
@@ -622,18 +710,20 @@ static void FastCopySurface(const Surface& src_surface, const Surface& dst_surfa
0, 0, width, height, 1);
}
+MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
- GLuint copy_pbo_handle, GLenum src_attachment = 0,
- GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
+ const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
+ const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0) {
+ MICROPROFILE_SCOPE(OpenGL_CopySurface);
ASSERT_MSG(dst_attachment == 0, "Unimplemented");
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
- auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+ const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
+ const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
- std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
+ const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
@@ -657,13 +747,10 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
"reinterpretation but the texture is tiled.");
}
- std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
- std::vector<u8> data(remaining_size);
- std::memcpy(data.data(), Memory::GetPointer(dst_params.addr + src_params.size_in_bytes),
- data.size());
+ const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
- data.data());
+ Memory::GetPointer(dst_params.addr + src_params.size_in_bytes));
}
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -679,21 +766,22 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
UNREACHABLE();
} else {
switch (dst_params.target) {
- case SurfaceParams::SurfaceTarget::Texture1D:
+ case SurfaceTarget::Texture1D:
glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
dest_format.type, nullptr);
break;
- case SurfaceParams::SurfaceTarget::Texture2D:
+ case SurfaceTarget::Texture2D:
glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
dest_format.format, dest_format.type, nullptr);
break;
- case SurfaceParams::SurfaceTarget::Texture3D:
- case SurfaceParams::SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::TextureCubeArray:
glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
static_cast<GLsizei>(dst_params.depth), dest_format.format,
dest_format.type, nullptr);
break;
- case SurfaceParams::SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::TextureCubemap:
glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
static_cast<GLint>(cubemap_face), width, height, 1,
dest_format.format, dest_format.type, nullptr);
@@ -730,35 +818,43 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
if (!format_tuple.compressed) {
// Only pre-create the texture for non-compressed textures.
switch (params.target) {
- case SurfaceParams::SurfaceTarget::Texture1D:
- glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
- rect.GetWidth());
+ case SurfaceTarget::Texture1D:
+ glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level,
+ format_tuple.internal_format, rect.GetWidth());
break;
- case SurfaceParams::SurfaceTarget::Texture2D:
- case SurfaceParams::SurfaceTarget::TextureCubemap:
- glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
- rect.GetWidth(), rect.GetHeight());
+ case SurfaceTarget::Texture2D:
+ case SurfaceTarget::TextureCubemap:
+ glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level,
+ format_tuple.internal_format, rect.GetWidth(), rect.GetHeight());
break;
- case SurfaceParams::SurfaceTarget::Texture3D:
- case SurfaceParams::SurfaceTarget::Texture2DArray:
- glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
- rect.GetWidth(), rect.GetHeight(), params.depth);
+ case SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::TextureCubeArray:
+ glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level,
+ format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(),
+ params.depth);
break;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
- glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
- rect.GetHeight());
+ glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
+ rect.GetWidth(), rect.GetHeight());
}
}
glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL,
+ params.max_mip_level - 1);
+ if (params.max_mip_level == 1) {
+ glTexParameterf(SurfaceTargetToGL(params.target), GL_TEXTURE_LOD_BIAS, 1000.0);
+ }
- VideoCore::LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
- SurfaceParams::SurfaceTargetName(params.target));
+ LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
+ SurfaceParams::SurfaceTargetName(params.target));
// Clamp size to mapped GPU memory region
// TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -788,7 +884,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo
S8Z24 s8z24_pixel{};
Z24S8 z24s8_pixel{};
- constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::S8Z24)};
+ constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
@@ -808,7 +904,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo
}
static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
- constexpr auto bpp{SurfaceParams::GetBytesPerPixel(PixelFormat::G8R8U)};
+ constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)};
for (std::size_t y = 0; y < height; ++y) {
for (std::size_t x = 0; x < width; ++x) {
const std::size_t offset{bpp * (y * width + x)};
@@ -825,17 +921,26 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
* typical desktop GPUs.
*/
static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
- u32 width, u32 height) {
+ u32 width, u32 height, u32 depth) {
switch (pixel_format) {
case PixelFormat::ASTC_2D_4X4:
case PixelFormat::ASTC_2D_8X8:
case PixelFormat::ASTC_2D_8X5:
- case PixelFormat::ASTC_2D_5X4: {
+ case PixelFormat::ASTC_2D_5X4:
+ case PixelFormat::ASTC_2D_5X5:
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_SRGB: {
// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
u32 block_width{};
u32 block_height{};
std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
- data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
+ data =
+ Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
break;
}
case PixelFormat::S8Z24:
@@ -862,7 +967,13 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
case PixelFormat::G8R8U:
case PixelFormat::G8R8S:
case PixelFormat::ASTC_2D_4X4:
- case PixelFormat::ASTC_2D_8X8: {
+ case PixelFormat::ASTC_2D_8X8:
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_5X5:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_SRGB: {
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
static_cast<u32>(pixel_format));
UNREACHABLE();
@@ -875,34 +986,25 @@ static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelForm
}
}
-MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
+MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
void CachedSurface::LoadGLBuffer() {
MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
-
- gl_buffer.resize(params.size_in_bytes_gl);
+ gl_buffer.resize(params.max_mip_level);
+ for (u32 i = 0; i < params.max_mip_level; i++)
+ gl_buffer[i].resize(params.GetMipmapSizeGL(i));
if (params.is_tiled) {
- u32 depth = params.depth;
- u32 block_depth = params.block_depth;
-
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
-
- if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
- // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
- depth = 1U;
- block_depth = 1U;
- }
-
- morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
- gl_buffer.size(), params.addr);
+ for (u32 i = 0; i < params.max_mip_level; i++)
+ SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i);
} else {
const auto texture_src_data{Memory::GetPointer(params.addr)};
const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
- gl_buffer.assign(texture_src_data, texture_src_data_end);
+ gl_buffer[0].assign(texture_src_data, texture_src_data_end);
}
-
- ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height);
+ for (u32 i = 0; i < params.max_mip_level; i++)
+ ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
+ params.MipHeight(i), params.MipDepth(i));
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
@@ -912,57 +1014,44 @@ void CachedSurface::FlushGLBuffer() {
ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
// OpenGL temporary buffer needs to be big enough to store raw texture size
- gl_buffer.resize(GetSizeInBytes());
+ gl_buffer.resize(1);
+ gl_buffer[0].resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
ASSERT(!tuple.compressed);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
- glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(),
- gl_buffer.data());
+ glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
+ static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
- ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
+ ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
params.height);
ASSERT(params.type != SurfaceType::Fill);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
- u32 depth = params.depth;
- u32 block_depth = params.block_depth;
-
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
params.block_width, static_cast<u32>(params.target));
- if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
- // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
- depth = 1U;
- }
- gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
- params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
- gl_buffer.size(), GetAddr());
+ SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0);
} else {
- std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer.data(), GetSizeInBytes());
+ std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
}
}
-MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
-void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
- if (params.type == SurfaceType::Fill)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_TextureUL);
-
- const auto& rect{params.GetRect()};
+void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
+ GLuint draw_fb_handle) {
+ const auto& rect{params.GetRect(mip_map)};
// Load data from memory to the surface
const GLint x0 = static_cast<GLint>(rect.left);
const GLint y0 = static_cast<GLint>(rect.bottom);
std::size_t buffer_offset =
- static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width +
+ static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
static_cast<std::size_t>(x0)) *
- SurfaceParams::GetBytesPerPixel(params.pixel_format);
+ GetBytesPerPixel(params.pixel_format);
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
const GLuint target_tex = texture.handle;
@@ -978,89 +1067,120 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
cur_state.Apply();
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.width * SurfaceParams::GetBytesPerPixel(params.pixel_format) % 4 == 0);
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width));
+ ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
+ GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
glActiveTexture(GL_TEXTURE0);
if (tuple.compressed) {
switch (params.target) {
- case SurfaceParams::SurfaceTarget::Texture2D:
- glCompressedTexImage2D(
- SurfaceTargetToGL(params.target), 0, tuple.internal_format,
- static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
- static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+ case SurfaceTarget::Texture2D:
+ glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+ static_cast<GLsizei>(params.MipWidth(mip_map)),
+ static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size,
+ &gl_buffer[mip_map][buffer_offset]);
+ break;
+ case SurfaceTarget::Texture3D:
+ glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+ static_cast<GLsizei>(params.MipWidth(mip_map)),
+ static_cast<GLsizei>(params.MipHeight(mip_map)),
+ static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size,
+ &gl_buffer[mip_map][buffer_offset]);
break;
- case SurfaceParams::SurfaceTarget::Texture3D:
- case SurfaceParams::SurfaceTarget::Texture2DArray:
- glCompressedTexImage3D(
- SurfaceTargetToGL(params.target), 0, tuple.internal_format,
- static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
- static_cast<GLsizei>(params.depth), 0,
- static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+ case SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::TextureCubeArray:
+ glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format,
+ static_cast<GLsizei>(params.MipWidth(mip_map)),
+ static_cast<GLsizei>(params.MipHeight(mip_map)),
+ static_cast<GLsizei>(params.depth), 0, image_size,
+ &gl_buffer[mip_map][buffer_offset]);
break;
- case SurfaceParams::SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::TextureCubemap: {
+ GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
for (std::size_t face = 0; face < params.depth; ++face) {
glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face),
- 0, tuple.internal_format, static_cast<GLsizei>(params.width),
- static_cast<GLsizei>(params.height), 0,
- static_cast<GLsizei>(params.SizeInBytesCubeFaceGL()),
- &gl_buffer[buffer_offset]);
- buffer_offset += params.SizeInBytesCubeFace();
+ mip_map, tuple.internal_format,
+ static_cast<GLsizei>(params.MipWidth(mip_map)),
+ static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
+ layer_size, &gl_buffer[mip_map][buffer_offset]);
+ buffer_offset += layer_size;
}
break;
+ }
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
- glCompressedTexImage2D(
- GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
- static_cast<GLsizei>(params.height), 0,
- static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[buffer_offset]);
+ glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format,
+ static_cast<GLsizei>(params.MipWidth(mip_map)),
+ static_cast<GLsizei>(params.MipHeight(mip_map)), 0,
+ static_cast<GLsizei>(params.size_in_bytes_gl),
+ &gl_buffer[mip_map][buffer_offset]);
}
} else {
switch (params.target) {
- case SurfaceParams::SurfaceTarget::Texture1D:
- glTexSubImage1D(SurfaceTargetToGL(params.target), 0, x0,
+ case SurfaceTarget::Texture1D:
+ glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0,
static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
- &gl_buffer[buffer_offset]);
+ &gl_buffer[mip_map][buffer_offset]);
break;
- case SurfaceParams::SurfaceTarget::Texture2D:
- glTexSubImage2D(SurfaceTargetToGL(params.target), 0, x0, y0,
+ case SurfaceTarget::Texture2D:
+ glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
- &gl_buffer[buffer_offset]);
+ &gl_buffer[mip_map][buffer_offset]);
break;
- case SurfaceParams::SurfaceTarget::Texture3D:
- case SurfaceParams::SurfaceTarget::Texture2DArray:
- glTexSubImage3D(SurfaceTargetToGL(params.target), 0, x0, y0, 0,
+ case SurfaceTarget::Texture3D:
+ glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
+ static_cast<GLsizei>(rect.GetWidth()),
+ static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
+ tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
+ break;
+ case SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::TextureCubeArray:
+ glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0,
static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
- tuple.type, &gl_buffer[buffer_offset]);
+ tuple.type, &gl_buffer[mip_map][buffer_offset]);
break;
- case SurfaceParams::SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::TextureCubemap: {
+ std::size_t start = buffer_offset;
for (std::size_t face = 0; face < params.depth; ++face) {
- glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 0, x0,
- y0, static_cast<GLsizei>(rect.GetWidth()),
+ glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map,
+ x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
- &gl_buffer[buffer_offset]);
- buffer_offset += params.SizeInBytesCubeFace();
+ &gl_buffer[mip_map][buffer_offset]);
+ buffer_offset += params.LayerSizeGL(mip_map);
}
break;
+ }
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(params.target));
UNREACHABLE();
- glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
+ glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
- &gl_buffer[buffer_offset]);
+ &gl_buffer[mip_map][buffer_offset]);
}
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
-RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
+MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
+void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
+ if (params.type == SurfaceType::Fill)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_TextureUL);
+
+ for (u32 i = 0; i < params.max_mip_level; i++)
+ UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
+}
+
+RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
+ : RasterizerCache{rasterizer} {
read_framebuffer.Create();
draw_framebuffer.Create();
copy_pbo.Create();
@@ -1179,7 +1299,7 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- FlushRegion(src_params.addr, dst_params.size_in_bytes);
+ FlushRegion(src_params.addr, dst_params.MemorySize());
LoadSurface(dst_surface);
}
@@ -1200,8 +1320,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
// For compatible surfaces, we can just do fast glCopyImageSubData based copy
if (old_params.target == new_params.target && old_params.type == new_params.type &&
old_params.depth == new_params.depth && old_params.depth == 1 &&
- SurfaceParams::GetFormatBpp(old_params.pixel_format) ==
- SurfaceParams::GetFormatBpp(new_params.pixel_format)) {
+ GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format)) {
FastCopySurface(old_surface, new_surface);
return new_surface;
}
@@ -1214,51 +1333,19 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
const bool is_blit{old_params.pixel_format == new_params.pixel_format};
switch (new_params.target) {
- case SurfaceParams::SurfaceTarget::Texture2D:
+ case SurfaceTarget::Texture2D:
if (is_blit) {
BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
} else {
CopySurface(old_surface, new_surface, copy_pbo.handle);
}
break;
- case SurfaceParams::SurfaceTarget::Texture3D:
+ case SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::Texture3D:
+ case SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::TextureCubeArray:
AccurateCopySurface(old_surface, new_surface);
break;
- case SurfaceParams::SurfaceTarget::TextureCubemap: {
- if (old_params.rt.array_mode != 1) {
- // TODO(bunnei): This is used by Breath of the Wild, I'm not sure how to implement this
- // yet (array rendering used as a cubemap texture).
- LOG_CRITICAL(HW_GPU, "Unhandled rendertarget array_mode {}", old_params.rt.array_mode);
- UNREACHABLE();
- return new_surface;
- }
-
- // This seems to be used for render-to-cubemap texture
- ASSERT_MSG(old_params.target == SurfaceParams::SurfaceTarget::Texture2D, "Unexpected");
- ASSERT_MSG(old_params.pixel_format == new_params.pixel_format, "Unexpected");
- ASSERT_MSG(old_params.rt.base_layer == 0, "Unimplemented");
-
- // TODO(bunnei): Verify the below - this stride seems to be in 32-bit words, not pixels.
- // Tested with Splatoon 2, Super Mario Odyssey, and Breath of the Wild.
- const std::size_t byte_stride{old_params.rt.layer_stride * sizeof(u32)};
-
- for (std::size_t index = 0; index < new_params.depth; ++index) {
- Surface face_surface{TryGetReservedSurface(old_params)};
- ASSERT_MSG(face_surface, "Unexpected");
-
- if (is_blit) {
- BlitSurface(face_surface, new_surface, read_framebuffer.handle,
- draw_framebuffer.handle, face_surface->GetSurfaceParams().rt.index,
- new_params.rt.index, index);
- } else {
- CopySurface(face_surface, new_surface, copy_pbo.handle,
- face_surface->GetSurfaceParams().rt.index, new_params.rt.index, index);
- }
-
- old_params.addr += byte_stride;
- }
- break;
- }
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
static_cast<u32>(new_params.target));
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0dd0d90a3..494f6b903 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -7,6 +7,7 @@
#include <array>
#include <map>
#include <memory>
+#include <string>
#include <vector>
#include "common/alignment.h"
@@ -18,6 +19,7 @@
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
#include "video_core/textures/texture.h"
@@ -27,126 +29,12 @@ class CachedSurface;
using Surface = std::shared_ptr<CachedSurface>;
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
-struct SurfaceParams {
- enum class PixelFormat {
- ABGR8U = 0,
- ABGR8S = 1,
- ABGR8UI = 2,
- B5G6R5U = 3,
- A2B10G10R10U = 4,
- A1B5G5R5U = 5,
- R8U = 6,
- R8UI = 7,
- RGBA16F = 8,
- RGBA16U = 9,
- RGBA16UI = 10,
- R11FG11FB10F = 11,
- RGBA32UI = 12,
- DXT1 = 13,
- DXT23 = 14,
- DXT45 = 15,
- DXN1 = 16, // This is also known as BC4
- DXN2UNORM = 17,
- DXN2SNORM = 18,
- BC7U = 19,
- BC6H_UF16 = 20,
- BC6H_SF16 = 21,
- ASTC_2D_4X4 = 22,
- G8R8U = 23,
- G8R8S = 24,
- BGRA8 = 25,
- RGBA32F = 26,
- RG32F = 27,
- R32F = 28,
- R16F = 29,
- R16U = 30,
- R16S = 31,
- R16UI = 32,
- R16I = 33,
- RG16 = 34,
- RG16F = 35,
- RG16UI = 36,
- RG16I = 37,
- RG16S = 38,
- RGB32F = 39,
- SRGBA8 = 40,
- RG8U = 41,
- RG8S = 42,
- RG32UI = 43,
- R32UI = 44,
- ASTC_2D_8X8 = 45,
- ASTC_2D_8X5 = 46,
- ASTC_2D_5X4 = 47,
-
- MaxColorFormat,
-
- // Depth formats
- Z32F = 48,
- Z16 = 49,
-
- MaxDepthFormat,
-
- // DepthStencil formats
- Z24S8 = 50,
- S8Z24 = 51,
- Z32FS8 = 52,
-
- MaxDepthStencilFormat,
-
- Max = MaxDepthStencilFormat,
- Invalid = 255,
- };
-
- static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
-
- enum class ComponentType {
- Invalid = 0,
- SNorm = 1,
- UNorm = 2,
- SInt = 3,
- UInt = 4,
- Float = 5,
- };
-
- enum class SurfaceType {
- ColorTexture = 0,
- Depth = 1,
- DepthStencil = 2,
- Fill = 3,
- Invalid = 4,
- };
-
- enum class SurfaceTarget {
- Texture1D,
- Texture2D,
- Texture3D,
- Texture1DArray,
- Texture2DArray,
- TextureCubemap,
- };
-
- static SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) {
- switch (texture_type) {
- case Tegra::Texture::TextureType::Texture1D:
- return SurfaceTarget::Texture1D;
- case Tegra::Texture::TextureType::Texture2D:
- case Tegra::Texture::TextureType::Texture2DNoMipmap:
- return SurfaceTarget::Texture2D;
- case Tegra::Texture::TextureType::Texture3D:
- return SurfaceTarget::Texture3D;
- case Tegra::Texture::TextureType::TextureCubemap:
- return SurfaceTarget::TextureCubemap;
- case Tegra::Texture::TextureType::Texture1DArray:
- return SurfaceTarget::Texture1DArray;
- case Tegra::Texture::TextureType::Texture2DArray:
- return SurfaceTarget::Texture2DArray;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
- UNREACHABLE();
- return SurfaceTarget::Texture2D;
- }
- }
+using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
+using SurfaceType = VideoCore::Surface::SurfaceType;
+using PixelFormat = VideoCore::Surface::PixelFormat;
+using ComponentType = VideoCore::Surface::ComponentType;
+struct SurfaceParams {
static std::string SurfaceTargetName(SurfaceTarget target) {
switch (target) {
case SurfaceTarget::Texture1D:
@@ -161,6 +49,8 @@ struct SurfaceParams {
return "Texture2DArray";
case SurfaceTarget::TextureCubemap:
return "TextureCubemap";
+ case SurfaceTarget::TextureCubeArray:
+ return "TextureCubeArray";
default:
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
UNREACHABLE();
@@ -168,552 +58,12 @@ struct SurfaceParams {
}
}
- /**
- * Gets the compression factor for the specified PixelFormat. This applies to just the
- * "compressed width" and "compressed height", not the overall compression factor of a
- * compressed image. This is used for maintaining proper surface sizes for compressed
- * texture formats.
- */
- static constexpr u32 GetCompressionFactor(PixelFormat format) {
- if (format == PixelFormat::Invalid)
- return 0;
-
- constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
- 1, // ABGR8U
- 1, // ABGR8S
- 1, // ABGR8UI
- 1, // B5G6R5U
- 1, // A2B10G10R10U
- 1, // A1B5G5R5U
- 1, // R8U
- 1, // R8UI
- 1, // RGBA16F
- 1, // RGBA16U
- 1, // RGBA16UI
- 1, // R11FG11FB10F
- 1, // RGBA32UI
- 4, // DXT1
- 4, // DXT23
- 4, // DXT45
- 4, // DXN1
- 4, // DXN2UNORM
- 4, // DXN2SNORM
- 4, // BC7U
- 4, // BC6H_UF16
- 4, // BC6H_SF16
- 4, // ASTC_2D_4X4
- 1, // G8R8U
- 1, // G8R8S
- 1, // BGRA8
- 1, // RGBA32F
- 1, // RG32F
- 1, // R32F
- 1, // R16F
- 1, // R16U
- 1, // R16S
- 1, // R16UI
- 1, // R16I
- 1, // RG16
- 1, // RG16F
- 1, // RG16UI
- 1, // RG16I
- 1, // RG16S
- 1, // RGB32F
- 1, // SRGBA8
- 1, // RG8U
- 1, // RG8S
- 1, // RG32UI
- 1, // R32UI
- 4, // ASTC_2D_8X8
- 4, // ASTC_2D_8X5
- 4, // ASTC_2D_5X4
- 1, // Z32F
- 1, // Z16
- 1, // Z24S8
- 1, // S8Z24
- 1, // Z32FS8
- }};
-
- ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
- return compression_factor_table[static_cast<std::size_t>(format)];
- }
-
- static constexpr u32 GetFormatBpp(PixelFormat format) {
- if (format == PixelFormat::Invalid)
- return 0;
-
- constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
- 32, // ABGR8U
- 32, // ABGR8S
- 32, // ABGR8UI
- 16, // B5G6R5U
- 32, // A2B10G10R10U
- 16, // A1B5G5R5U
- 8, // R8U
- 8, // R8UI
- 64, // RGBA16F
- 64, // RGBA16U
- 64, // RGBA16UI
- 32, // R11FG11FB10F
- 128, // RGBA32UI
- 64, // DXT1
- 128, // DXT23
- 128, // DXT45
- 64, // DXN1
- 128, // DXN2UNORM
- 128, // DXN2SNORM
- 128, // BC7U
- 128, // BC6H_UF16
- 128, // BC6H_SF16
- 32, // ASTC_2D_4X4
- 16, // G8R8U
- 16, // G8R8S
- 32, // BGRA8
- 128, // RGBA32F
- 64, // RG32F
- 32, // R32F
- 16, // R16F
- 16, // R16U
- 16, // R16S
- 16, // R16UI
- 16, // R16I
- 32, // RG16
- 32, // RG16F
- 32, // RG16UI
- 32, // RG16I
- 32, // RG16S
- 96, // RGB32F
- 32, // SRGBA8
- 16, // RG8U
- 16, // RG8S
- 64, // RG32UI
- 32, // R32UI
- 16, // ASTC_2D_8X8
- 32, // ASTC_2D_8X5
- 32, // ASTC_2D_5X4
- 32, // Z32F
- 16, // Z16
- 32, // Z24S8
- 32, // S8Z24
- 64, // Z32FS8
- }};
-
- ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
- return bpp_table[static_cast<std::size_t>(format)];
- }
-
u32 GetFormatBpp() const {
- return GetFormatBpp(pixel_format);
- }
-
- static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
- switch (format) {
- case Tegra::DepthFormat::S8_Z24_UNORM:
- return PixelFormat::S8Z24;
- case Tegra::DepthFormat::Z24_S8_UNORM:
- return PixelFormat::Z24S8;
- case Tegra::DepthFormat::Z32_FLOAT:
- return PixelFormat::Z32F;
- case Tegra::DepthFormat::Z16_UNORM:
- return PixelFormat::Z16;
- case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
- return PixelFormat::Z32FS8;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
- static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
- switch (format) {
- // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
- // gamma.
- case Tegra::RenderTargetFormat::RGBA8_SRGB:
- case Tegra::RenderTargetFormat::RGBA8_UNORM:
- return PixelFormat::ABGR8U;
- case Tegra::RenderTargetFormat::RGBA8_SNORM:
- return PixelFormat::ABGR8S;
- case Tegra::RenderTargetFormat::RGBA8_UINT:
- return PixelFormat::ABGR8UI;
- case Tegra::RenderTargetFormat::BGRA8_SRGB:
- case Tegra::RenderTargetFormat::BGRA8_UNORM:
- return PixelFormat::BGRA8;
- case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
- return PixelFormat::A2B10G10R10U;
- case Tegra::RenderTargetFormat::RGBA16_FLOAT:
- return PixelFormat::RGBA16F;
- case Tegra::RenderTargetFormat::RGBA16_UNORM:
- return PixelFormat::RGBA16U;
- case Tegra::RenderTargetFormat::RGBA16_UINT:
- return PixelFormat::RGBA16UI;
- case Tegra::RenderTargetFormat::RGBA32_FLOAT:
- return PixelFormat::RGBA32F;
- case Tegra::RenderTargetFormat::RG32_FLOAT:
- return PixelFormat::RG32F;
- case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
- return PixelFormat::R11FG11FB10F;
- case Tegra::RenderTargetFormat::B5G6R5_UNORM:
- return PixelFormat::B5G6R5U;
- case Tegra::RenderTargetFormat::BGR5A1_UNORM:
- return PixelFormat::A1B5G5R5U;
- case Tegra::RenderTargetFormat::RGBA32_UINT:
- return PixelFormat::RGBA32UI;
- case Tegra::RenderTargetFormat::R8_UNORM:
- return PixelFormat::R8U;
- case Tegra::RenderTargetFormat::R8_UINT:
- return PixelFormat::R8UI;
- case Tegra::RenderTargetFormat::RG16_FLOAT:
- return PixelFormat::RG16F;
- case Tegra::RenderTargetFormat::RG16_UINT:
- return PixelFormat::RG16UI;
- case Tegra::RenderTargetFormat::RG16_SINT:
- return PixelFormat::RG16I;
- case Tegra::RenderTargetFormat::RG16_UNORM:
- return PixelFormat::RG16;
- case Tegra::RenderTargetFormat::RG16_SNORM:
- return PixelFormat::RG16S;
- case Tegra::RenderTargetFormat::RG8_UNORM:
- return PixelFormat::RG8U;
- case Tegra::RenderTargetFormat::RG8_SNORM:
- return PixelFormat::RG8S;
- case Tegra::RenderTargetFormat::R16_FLOAT:
- return PixelFormat::R16F;
- case Tegra::RenderTargetFormat::R16_UNORM:
- return PixelFormat::R16U;
- case Tegra::RenderTargetFormat::R16_SNORM:
- return PixelFormat::R16S;
- case Tegra::RenderTargetFormat::R16_UINT:
- return PixelFormat::R16UI;
- case Tegra::RenderTargetFormat::R16_SINT:
- return PixelFormat::R16I;
- case Tegra::RenderTargetFormat::R32_FLOAT:
- return PixelFormat::R32F;
- case Tegra::RenderTargetFormat::R32_UINT:
- return PixelFormat::R32UI;
- case Tegra::RenderTargetFormat::RG32_UINT:
- return PixelFormat::RG32UI;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
- static PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
- Tegra::Texture::ComponentType component_type) {
- // TODO(Subv): Properly implement this
- switch (format) {
- case Tegra::Texture::TextureFormat::A8R8G8B8:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::ABGR8U;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::ABGR8S;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::ABGR8UI;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::B5G6R5:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::B5G6R5U;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::A2B10G10R10:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::A2B10G10R10U;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::A1B5G5R5:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::A1B5G5R5U;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::R8:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::R8U;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::R8UI;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::G8R8:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::G8R8U;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::G8R8S;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::RGBA16U;
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RGBA16F;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::BF10GF11RF11:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::R11FG11FB10F;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RGBA32F;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::RGBA32UI;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::R32_G32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RG32F;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::RG32UI;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::R32_G32_B32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RGB32F;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::R16:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::R16F;
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::R16U;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::R16S;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::R16UI;
- case Tegra::Texture::ComponentType::SINT:
- return PixelFormat::R16I;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::R32:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::R32F;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::R32UI;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::ZF32:
- return PixelFormat::Z32F;
- case Tegra::Texture::TextureFormat::Z16:
- return PixelFormat::Z16;
- case Tegra::Texture::TextureFormat::Z24S8:
- return PixelFormat::Z24S8;
- case Tegra::Texture::TextureFormat::DXT1:
- return PixelFormat::DXT1;
- case Tegra::Texture::TextureFormat::DXT23:
- return PixelFormat::DXT23;
- case Tegra::Texture::TextureFormat::DXT45:
- return PixelFormat::DXT45;
- case Tegra::Texture::TextureFormat::DXN1:
- return PixelFormat::DXN1;
- case Tegra::Texture::TextureFormat::DXN2:
- switch (component_type) {
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::DXN2UNORM;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::DXN2SNORM;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- case Tegra::Texture::TextureFormat::BC7U:
- return PixelFormat::BC7U;
- case Tegra::Texture::TextureFormat::BC6H_UF16:
- return PixelFormat::BC6H_UF16;
- case Tegra::Texture::TextureFormat::BC6H_SF16:
- return PixelFormat::BC6H_SF16;
- case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
- return PixelFormat::ASTC_2D_4X4;
- case Tegra::Texture::TextureFormat::ASTC_2D_5X4:
- return PixelFormat::ASTC_2D_5X4;
- case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
- return PixelFormat::ASTC_2D_8X8;
- case Tegra::Texture::TextureFormat::ASTC_2D_8X5:
- return PixelFormat::ASTC_2D_8X5;
- case Tegra::Texture::TextureFormat::R16_G16:
- switch (component_type) {
- case Tegra::Texture::ComponentType::FLOAT:
- return PixelFormat::RG16F;
- case Tegra::Texture::ComponentType::UNORM:
- return PixelFormat::RG16;
- case Tegra::Texture::ComponentType::SNORM:
- return PixelFormat::RG16S;
- case Tegra::Texture::ComponentType::UINT:
- return PixelFormat::RG16UI;
- case Tegra::Texture::ComponentType::SINT:
- return PixelFormat::RG16I;
- }
- LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
- static_cast<u32>(component_type));
- UNREACHABLE();
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}",
- static_cast<u32>(format), static_cast<u32>(component_type));
- UNREACHABLE();
- }
- }
-
- static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
- // TODO(Subv): Implement more component types
- switch (type) {
- case Tegra::Texture::ComponentType::UNORM:
- return ComponentType::UNorm;
- case Tegra::Texture::ComponentType::FLOAT:
- return ComponentType::Float;
- case Tegra::Texture::ComponentType::SNORM:
- return ComponentType::SNorm;
- case Tegra::Texture::ComponentType::UINT:
- return ComponentType::UInt;
- case Tegra::Texture::ComponentType::SINT:
- return ComponentType::SInt;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
- UNREACHABLE();
- }
- }
-
- static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
- // TODO(Subv): Implement more render targets
- switch (format) {
- case Tegra::RenderTargetFormat::RGBA8_UNORM:
- case Tegra::RenderTargetFormat::RGBA8_SRGB:
- case Tegra::RenderTargetFormat::BGRA8_UNORM:
- case Tegra::RenderTargetFormat::BGRA8_SRGB:
- case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
- case Tegra::RenderTargetFormat::R8_UNORM:
- case Tegra::RenderTargetFormat::RG16_UNORM:
- case Tegra::RenderTargetFormat::R16_UNORM:
- case Tegra::RenderTargetFormat::B5G6R5_UNORM:
- case Tegra::RenderTargetFormat::BGR5A1_UNORM:
- case Tegra::RenderTargetFormat::RG8_UNORM:
- case Tegra::RenderTargetFormat::RGBA16_UNORM:
- return ComponentType::UNorm;
- case Tegra::RenderTargetFormat::RGBA8_SNORM:
- case Tegra::RenderTargetFormat::RG16_SNORM:
- case Tegra::RenderTargetFormat::R16_SNORM:
- case Tegra::RenderTargetFormat::RG8_SNORM:
- return ComponentType::SNorm;
- case Tegra::RenderTargetFormat::RGBA16_FLOAT:
- case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
- case Tegra::RenderTargetFormat::RGBA32_FLOAT:
- case Tegra::RenderTargetFormat::RG32_FLOAT:
- case Tegra::RenderTargetFormat::RG16_FLOAT:
- case Tegra::RenderTargetFormat::R16_FLOAT:
- case Tegra::RenderTargetFormat::R32_FLOAT:
- return ComponentType::Float;
- case Tegra::RenderTargetFormat::RGBA32_UINT:
- case Tegra::RenderTargetFormat::RGBA16_UINT:
- case Tegra::RenderTargetFormat::RG16_UINT:
- case Tegra::RenderTargetFormat::R8_UINT:
- case Tegra::RenderTargetFormat::R16_UINT:
- case Tegra::RenderTargetFormat::RG32_UINT:
- case Tegra::RenderTargetFormat::R32_UINT:
- case Tegra::RenderTargetFormat::RGBA8_UINT:
- return ComponentType::UInt;
- case Tegra::RenderTargetFormat::RG16_SINT:
- case Tegra::RenderTargetFormat::R16_SINT:
- return ComponentType::SInt;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
- static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
- switch (format) {
- case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return PixelFormat::ABGR8U;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
- static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
- switch (format) {
- case Tegra::DepthFormat::Z16_UNORM:
- case Tegra::DepthFormat::S8_Z24_UNORM:
- case Tegra::DepthFormat::Z24_S8_UNORM:
- return ComponentType::UNorm;
- case Tegra::DepthFormat::Z32_FLOAT:
- case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
- return ComponentType::Float;
- default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
- }
- }
-
- static SurfaceType GetFormatType(PixelFormat pixel_format) {
- if (static_cast<std::size_t>(pixel_format) <
- static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
- return SurfaceType::ColorTexture;
- }
-
- if (static_cast<std::size_t>(pixel_format) <
- static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
- return SurfaceType::Depth;
- }
-
- if (static_cast<std::size_t>(pixel_format) <
- static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
- return SurfaceType::DepthStencil;
- }
-
- // TODO(Subv): Implement the other formats
- ASSERT(false);
-
- return SurfaceType::Invalid;
- }
-
- /// Returns the sizer in bytes of the specified pixel format
- static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
- if (pixel_format == SurfaceParams::PixelFormat::Invalid) {
- return 0;
- }
- return GetFormatBpp(pixel_format) / CHAR_BIT;
+ return VideoCore::Surface::GetFormatBpp(pixel_format);
}
/// Returns the rectangle corresponding to this surface
- MathUtil::Rectangle<u32> GetRect() const;
+ MathUtil::Rectangle<u32> GetRect(u32 mip_level = 0) const;
/// Returns the total size of this surface in bytes, adjusted for compression
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
@@ -742,6 +92,91 @@ struct SurfaceParams {
return size_in_bytes_gl / 6;
}
+ /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
+ std::size_t MemorySize() const {
+ std::size_t size = InnerMemorySize(false, is_layered);
+ if (is_layered)
+ return size * depth;
+ return size;
+ }
+
+ /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
+ /// mipmaps.
+ std::size_t LayerMemorySize() const {
+ return InnerMemorySize(false, true);
+ }
+
+ /// Returns the size of a layer of this surface in OpenGL.
+ std::size_t LayerSizeGL(u32 mip_level) const {
+ return InnerMipmapMemorySize(mip_level, true, is_layered, false);
+ }
+
+ std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
+ std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
+ if (is_layered)
+ return size * depth;
+ return size;
+ }
+
+ std::size_t GetMipmapLevelOffset(u32 mip_level) const {
+ std::size_t offset = 0;
+ for (u32 i = 0; i < mip_level; i++)
+ offset += InnerMipmapMemorySize(i, false, is_layered);
+ return offset;
+ }
+
+ std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
+ std::size_t offset = 0;
+ for (u32 i = 0; i < mip_level; i++)
+ offset += InnerMipmapMemorySize(i, true, is_layered);
+ return offset;
+ }
+
+ u32 MipWidth(u32 mip_level) const {
+ return std::max(1U, width >> mip_level);
+ }
+
+ u32 MipHeight(u32 mip_level) const {
+ return std::max(1U, height >> mip_level);
+ }
+
+ u32 MipDepth(u32 mip_level) const {
+ return is_layered ? depth : std::max(1U, depth >> mip_level);
+ }
+
+ // Auto block resizing algorithm from:
+ // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+ u32 MipBlockHeight(u32 mip_level) const {
+ if (mip_level == 0)
+ return block_height;
+ u32 alt_height = MipHeight(mip_level);
+ u32 h = GetDefaultBlockHeight(pixel_format);
+ u32 blocks_in_y = (alt_height + h - 1) / h;
+ u32 bh = 16;
+ while (bh > 1 && blocks_in_y <= bh * 4) {
+ bh >>= 1;
+ }
+ return bh;
+ }
+
+ u32 MipBlockDepth(u32 mip_level) const {
+ if (mip_level == 0)
+ return block_depth;
+ if (is_layered)
+ return 1;
+ u32 depth = MipDepth(mip_level);
+ u32 bd = 32;
+ while (bd > 1 && depth * 2 <= bd) {
+ bd >>= 1;
+ }
+ if (bd == 32) {
+ u32 bh = MipBlockHeight(mip_level);
+ if (bh >= 4)
+ return 16;
+ }
+ return bd;
+ }
+
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
@@ -782,7 +217,8 @@ struct SurfaceParams {
u32 unaligned_height;
SurfaceTarget target;
u32 max_mip_level;
-
+ bool is_layered;
+ bool srgb_conversion;
// Parameters used for caching
VAddr addr;
Tegra::GPUVAddr gpu_addr;
@@ -797,6 +233,12 @@ struct SurfaceParams {
u32 layer_stride;
u32 base_layer;
} rt;
+
+private:
+ std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
+ bool uncompressed = false) const;
+ std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
+ bool uncompressed = false) const;
};
}; // namespace OpenGL
@@ -822,6 +264,8 @@ struct hash<SurfaceReserveKey> {
namespace OpenGL {
+class RasterizerOpenGL;
+
class CachedSurface final : public RasterizerCacheObject {
public:
CachedSurface(const SurfaceParams& params);
@@ -858,8 +302,10 @@ public:
void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
private:
+ void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
+
OGLTexture texture;
- std::vector<u8> gl_buffer;
+ std::vector<std::vector<u8>> gl_buffer;
SurfaceParams params;
GLenum gl_target;
std::size_t cached_size_in_bytes;
@@ -867,7 +313,7 @@ private:
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
public:
- RasterizerCacheOpenGL();
+ explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);
/// Get a surface based on the texture configuration
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
new file mode 100644
index 000000000..c17d5ac00
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -0,0 +1,186 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+#include <glad/glad.h>
+#include "common/common_types.h"
+#include "common/microprofile.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_state.h"
+
+MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
+
+namespace OpenGL {
+
+void OGLTexture::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glGenTextures(1, &handle);
+}
+
+void OGLTexture::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteTextures(1, &handle);
+ OpenGLState::GetCurState().UnbindTexture(handle).Apply();
+ handle = 0;
+}
+
+void OGLSampler::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glGenSamplers(1, &handle);
+}
+
+void OGLSampler::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteSamplers(1, &handle);
+ OpenGLState::GetCurState().ResetSampler(handle).Apply();
+ handle = 0;
+}
+
+void OGLShader::Create(const char* source, GLenum type) {
+ if (handle != 0)
+ return;
+ if (source == nullptr)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ handle = GLShader::LoadShader(source, type);
+}
+
+void OGLShader::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteShader(handle);
+ handle = 0;
+}
+
+void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
+ const char* frag_shader, bool separable_program) {
+ OGLShader vert, geo, frag;
+ if (vert_shader)
+ vert.Create(vert_shader, GL_VERTEX_SHADER);
+ if (geo_shader)
+ geo.Create(geo_shader, GL_GEOMETRY_SHADER);
+ if (frag_shader)
+ frag.Create(frag_shader, GL_FRAGMENT_SHADER);
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ Create(separable_program, vert.handle, geo.handle, frag.handle);
+}
+
+void OGLProgram::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteProgram(handle);
+ OpenGLState::GetCurState().ResetProgram(handle).Apply();
+ handle = 0;
+}
+
+void OGLPipeline::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glGenProgramPipelines(1, &handle);
+}
+
+void OGLPipeline::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteProgramPipelines(1, &handle);
+ OpenGLState::GetCurState().ResetPipeline(handle).Apply();
+ handle = 0;
+}
+
+void OGLBuffer::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glGenBuffers(1, &handle);
+}
+
+void OGLBuffer::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteBuffers(1, &handle);
+ OpenGLState::GetCurState().ResetBuffer(handle).Apply();
+ handle = 0;
+}
+
+void OGLSync::Create() {
+ if (handle != 0)
+ return;
+
+ // Don't profile here, this one is expected to happen ingame.
+ handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+}
+
+void OGLSync::Release() {
+ if (handle == 0)
+ return;
+
+ // Don't profile here, this one is expected to happen ingame.
+ glDeleteSync(handle);
+ handle = 0;
+}
+
+void OGLVertexArray::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glGenVertexArrays(1, &handle);
+}
+
+void OGLVertexArray::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteVertexArrays(1, &handle);
+ OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
+ handle = 0;
+}
+
+void OGLFramebuffer::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glGenFramebuffers(1, &handle);
+}
+
+void OGLFramebuffer::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteFramebuffers(1, &handle);
+ OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
+ handle = 0;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 3bc1b83b5..e33f1e973 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
-#include "video_core/renderer_opengl/gl_state.h"
namespace OpenGL {
@@ -29,20 +28,10 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create() {
- if (handle != 0)
- return;
- glGenTextures(1, &handle);
- }
+ void Create();
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteTextures(1, &handle);
- OpenGLState::GetCurState().UnbindTexture(handle).Apply();
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
@@ -64,20 +53,10 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create() {
- if (handle != 0)
- return;
- glGenSamplers(1, &handle);
- }
+ void Create();
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteSamplers(1, &handle);
- OpenGLState::GetCurState().ResetSampler(handle).Apply();
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
@@ -98,20 +77,9 @@ public:
return *this;
}
- void Create(const char* source, GLenum type) {
- if (handle != 0)
- return;
- if (source == nullptr)
- return;
- handle = GLShader::LoadShader(source, type);
- }
+ void Create(const char* source, GLenum type);
- void Release() {
- if (handle == 0)
- return;
- glDeleteShader(handle);
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
@@ -141,25 +109,10 @@ public:
/// Creates a new internal OpenGL resource and stores the handle
void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
- bool separable_program = false) {
- OGLShader vert, geo, frag;
- if (vert_shader)
- vert.Create(vert_shader, GL_VERTEX_SHADER);
- if (geo_shader)
- geo.Create(geo_shader, GL_GEOMETRY_SHADER);
- if (frag_shader)
- frag.Create(frag_shader, GL_FRAGMENT_SHADER);
- Create(separable_program, vert.handle, geo.handle, frag.handle);
- }
+ bool separable_program = false);
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteProgram(handle);
- OpenGLState::GetCurState().ResetProgram(handle).Apply();
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
@@ -178,20 +131,10 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create() {
- if (handle != 0)
- return;
- glGenProgramPipelines(1, &handle);
- }
+ void Create();
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteProgramPipelines(1, &handle);
- OpenGLState::GetCurState().ResetPipeline(handle).Apply();
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
@@ -213,20 +156,10 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create() {
- if (handle != 0)
- return;
- glGenBuffers(1, &handle);
- }
+ void Create();
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteBuffers(1, &handle);
- OpenGLState::GetCurState().ResetBuffer(handle).Apply();
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
@@ -247,19 +180,10 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create() {
- if (handle != 0)
- return;
- handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- }
+ void Create();
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteSync(handle);
- handle = 0;
- }
+ void Release();
GLsync handle = 0;
};
@@ -281,20 +205,10 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create() {
- if (handle != 0)
- return;
- glGenVertexArrays(1, &handle);
- }
+ void Create();
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteVertexArrays(1, &handle);
- OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
@@ -316,20 +230,10 @@ public:
}
/// Creates a new internal OpenGL resource and stores the handle
- void Create() {
- if (handle != 0)
- return;
- glGenFramebuffers(1, &handle);
- }
+ void Create();
/// Deletes the internal OpenGL resource
- void Release() {
- if (handle == 0)
- return;
- glDeleteFramebuffers(1, &handle);
- OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
- handle = 0;
- }
+ void Release();
GLuint handle = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1a03a677f..a85a7c0c5 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -6,9 +6,10 @@
#include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
-#include "video_core/utils.h"
+#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
@@ -89,7 +90,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
shader.Create(program_result.first.c_str(), gl_type);
program.Create(true, shader.handle);
SetShaderUniformBlockBindings(program.handle);
- VideoCore::LabelGLObject(GL_PROGRAM, program.handle, addr);
+ LabelGLObject(GL_PROGRAM, program.handle, addr);
} else {
// Store shader's code to lazily build it on draw
geometry_programs.code = program_result.first;
@@ -120,20 +121,26 @@ GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
}
GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
- const std::string& glsl_topology,
+ const std::string& glsl_topology, u32 max_vertices,
const std::string& debug_name) {
if (target_program.handle != 0) {
return target_program.handle;
}
- const std::string source{geometry_programs.code + "layout (" + glsl_topology + ") in;\n"};
+ std::string source = "#version 430 core\n";
+ source += "layout (" + glsl_topology + ") in;\n";
+ source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
+ source += geometry_programs.code;
+
OGLShader shader;
shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
target_program.Create(true, shader.handle);
SetShaderUniformBlockBindings(target_program.handle);
- VideoCore::LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
+ LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
return target_program.handle;
};
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
+
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const VAddr program_addr{GetShaderAddress(program)};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a210f1731..ffbf21831 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -16,6 +16,8 @@
namespace OpenGL {
class CachedShader;
+class RasterizerOpenGL;
+
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -46,22 +48,23 @@ public:
}
switch (primitive_mode) {
case GL_POINTS:
- return LazyGeometryProgram(geometry_programs.points, "points", "ShaderPoints");
+ return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
case GL_LINES:
case GL_LINE_STRIP:
- return LazyGeometryProgram(geometry_programs.lines, "lines", "ShaderLines");
+ return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
case GL_LINES_ADJACENCY:
case GL_LINE_STRIP_ADJACENCY:
- return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency",
+ return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
"ShaderLinesAdjacency");
case GL_TRIANGLES:
case GL_TRIANGLE_STRIP:
case GL_TRIANGLE_FAN:
- return LazyGeometryProgram(geometry_programs.triangles, "triangles", "ShaderTriangles");
+ return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
+ "ShaderTriangles");
case GL_TRIANGLES_ADJACENCY:
case GL_TRIANGLE_STRIP_ADJACENCY:
return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
- "ShaderLines");
+ 6, "ShaderTrianglesAdjacency");
default:
UNREACHABLE_MSG("Unknown primitive mode.");
}
@@ -76,7 +79,7 @@ public:
private:
/// Generates a geometry shader or returns one that already exists.
GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
- const std::string& debug_name);
+ u32 max_vertices, const std::string& debug_name);
VAddr addr;
Maxwell::ShaderProgram program_type;
@@ -104,6 +107,8 @@ private:
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public:
+ explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer);
+
/// Gets the current specified shader stage program
Shader GetStageProgram(Maxwell::ShaderProgram program);
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index fe4d1bd83..5fde22ad4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -3,11 +3,12 @@
// Refer to the license.txt file included.
#include <map>
+#include <optional>
#include <set>
#include <string>
#include <string_view>
+#include <unordered_set>
-#include <boost/optional.hpp>
#include <fmt/format.h>
#include "common/assert.h"
@@ -143,7 +144,7 @@ private:
for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
const Instruction instr = {program_code[offset]};
if (const auto opcode = OpCode::Decode(instr)) {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
// The EXIT instruction can be predicated, which means that the shader can
// conditionally end on this instruction. We have to consider the case where the
@@ -276,7 +277,8 @@ public:
GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
const Tegra::Shader::Header& header)
- : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header} {
+ : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
+ fixed_pipeline_output_attributes_used{}, local_memory_size{0} {
BuildRegisterList();
BuildInputList();
}
@@ -339,10 +341,10 @@ public:
*/
void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
u64 dest_num_components, u64 value_num_components,
- bool is_saturated = false, u64 dest_elem = 0) {
+ bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {
SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
- dest_num_components, value_num_components, dest_elem);
+ dest_num_components, value_num_components, dest_elem, precise);
}
/**
@@ -366,11 +368,12 @@ public:
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
- dest_num_components, value_num_components, dest_elem);
+ dest_num_components, value_num_components, dest_elem, false);
if (sets_cc) {
const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
+ LOG_WARNING(HW_GPU, "Control Codes Imcomplete.");
}
}
@@ -414,7 +417,7 @@ public:
}
}();
- SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem);
+ SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
}
/**
@@ -428,12 +431,31 @@ public:
*/
void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
const Tegra::Shader::IpaMode& input_mode,
- boost::optional<Register> vertex = {}) {
+ std::optional<Register> vertex = {}) {
const std::string dest = GetRegisterAsFloat(reg);
const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem);
shader.AddLine(dest + " = " + src + ';');
}
+ std::string GetLocalMemoryAsFloat(const std::string& index) {
+ return "lmem[" + index + ']';
+ }
+
+ std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) {
+ const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
+ return func + "(lmem[" + index + "])";
+ }
+
+ void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) {
+ shader.AddLine("lmem[" + index + "] = " + value + ';');
+ }
+
+ void SetLocalMemoryAsInteger(const std::string& index, const std::string& value,
+ bool is_signed = false) {
+ const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
+ shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
+ }
+
std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
switch (cc) {
case Tegra::Shader::ControlCode::NEU:
@@ -472,15 +494,20 @@ public:
// instruction for now.
if (stage == Maxwell3D::Regs::ShaderStage::Geometry) {
// TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry
- // shader. These instructions use a dirty register as buffer index. To avoid some
- // drivers from complaining for the out of boundary writes, guard them.
- const std::string buf_index{"min(" + GetRegisterAsInteger(buf_reg) + ", " +
- std::to_string(MAX_GEOMETRY_BUFFERS - 1) + ')'};
+ // shader. These instructions use a dirty register as buffer index, to avoid some
+ // drivers from complaining about out of boundary writes, guard them.
+ const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " +
+ std::to_string(MAX_GEOMETRY_BUFFERS) + ')'};
shader.AddLine("amem[" + buf_index + "][" +
std::to_string(static_cast<u32>(attribute)) + ']' +
GetSwizzle(elem) + " = " + src + ';');
} else {
- shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
+ if (attribute == Attribute::Index::PointSize) {
+ fixed_pipeline_output_attributes_used.insert(attribute);
+ shader.AddLine(dest + " = " + src + ';');
+ } else {
+ shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';');
+ }
}
}
}
@@ -524,7 +551,9 @@ public:
/// Add declarations.
void GenerateDeclarations(const std::string& suffix) {
+ GenerateVertex();
GenerateRegisters(suffix);
+ GenerateLocalMemory();
GenerateInternalFlags();
GenerateInputAttrs();
GenerateOutputAttrs();
@@ -570,6 +599,10 @@ public:
return entry.GetName();
}
+ void SetLocalMemory(u64 lmem) {
+ local_memory_size = lmem;
+ }
+
private:
/// Generates declarations for registers.
void GenerateRegisters(const std::string& suffix) {
@@ -580,6 +613,15 @@ private:
declarations.AddNewLine();
}
+ /// Generates declarations for local memory.
+ void GenerateLocalMemory() {
+ if (local_memory_size > 0) {
+ declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) +
+ "];");
+ declarations.AddNewLine();
+ }
+ }
+
/// Generates declarations for internal flags.
void GenerateInternalFlags() {
for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
@@ -683,6 +725,20 @@ private:
declarations.AddNewLine();
}
+ void GenerateVertex() {
+ if (stage != Maxwell3D::Regs::ShaderStage::Vertex)
+ return;
+ declarations.AddLine("out gl_PerVertex {");
+ ++declarations.scope;
+ declarations.AddLine("vec4 gl_Position;");
+ for (auto& o : fixed_pipeline_output_attributes_used) {
+ if (o == Attribute::Index::PointSize)
+ declarations.AddLine("float gl_PointSize;");
+ }
+ --declarations.scope;
+ declarations.AddLine("};");
+ }
+
/// Generates code representing a temporary (GPR) register.
std::string GetRegister(const Register& reg, unsigned elem) {
if (reg == Register::ZeroIndex) {
@@ -702,7 +758,8 @@ private:
* @param dest_elem Optional, the destination element to use for the operation.
*/
void SetRegister(const Register& reg, u64 elem, const std::string& value,
- u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
+ u64 dest_num_components, u64 value_num_components, u64 dest_elem,
+ bool precise) {
if (reg == Register::ZeroIndex) {
LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");
UNREACHABLE();
@@ -719,7 +776,18 @@ private:
src += GetSwizzle(elem);
}
- shader.AddLine(dest + " = " + src + ';');
+ if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
+ shader.AddLine('{');
+ ++shader.scope;
+ // This avoids optimizations of constant propagation and keeps the code as the original
+ // Sadly using the precise keyword causes "linking" errors on fragment shaders.
+ shader.AddLine("precise float tmp = " + src + ';');
+ shader.AddLine(dest + " = tmp;");
+ --shader.scope;
+ shader.AddLine('}');
+ } else {
+ shader.AddLine(dest + " = " + src + ';');
+ }
}
/// Build the GLSL register list.
@@ -740,10 +808,14 @@ private:
/// Generates code representing an input attribute register.
std::string GetInputAttribute(Attribute::Index attribute,
const Tegra::Shader::IpaMode& input_mode,
- boost::optional<Register> vertex = {}) {
+ std::optional<Register> vertex = {}) {
auto GeometryPass = [&](const std::string& name) {
if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) {
- return "gs_" + name + '[' + GetRegisterAsInteger(vertex.value(), 0, false) + ']';
+ // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set
+ // an 0x80000000 index for those and the shader fails to build. Find out why this
+ // happens and what's its intent.
+ return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) +
+ " % MAX_VERTEX_INPUT]";
}
return name;
};
@@ -836,6 +908,8 @@ private:
/// Generates code representing the declaration name of an output attribute register.
std::string GetOutputAttribute(Attribute::Index attribute) {
switch (attribute) {
+ case Attribute::Index::PointSize:
+ return "gl_PointSize";
case Attribute::Index::Position:
return "position";
default:
@@ -870,6 +944,8 @@ private:
const Maxwell3D::Regs::ShaderStage& stage;
const std::string& suffix;
const Tegra::Shader::Header& header;
+ std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
+ u64 local_memory_size;
};
class GLSLGenerator {
@@ -879,6 +955,8 @@ public:
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
stage(stage), suffix(suffix) {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
+ local_memory_size = header.GetLocalMemorySize();
+ regs.SetLocalMemory(local_memory_size);
Generate(suffix);
}
@@ -1392,7 +1470,7 @@ private:
}
shader.AddLine(
- fmt::format("// {}: {} (0x{:016x})", offset, opcode->GetName(), instr.value));
+ fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value));
using Tegra::Shader::Pred;
ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
@@ -1400,7 +1478,7 @@ private:
// Some instructions (like SSY) don't have a predicate field, they are always
// unconditionally executed.
- bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId());
+ bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
shader.AddLine("if (" +
@@ -1410,7 +1488,7 @@ private:
++shader.scope;
}
- switch (opcode->GetType()) {
+ switch (opcode->get().GetType()) {
case OpCode::Type::Arithmetic: {
std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
@@ -1427,7 +1505,7 @@ private:
}
}
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
// MOV does not have neither 'abs' nor 'neg' bits.
@@ -1449,8 +1527,13 @@ private:
ASSERT_MSG(instr.fmul.cc == 0, "FMUL cc is not implemented");
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
+
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "FMUL Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::FADD_C:
@@ -1458,8 +1541,13 @@ private:
case OpCode::Id::FADD_IMM: {
op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "FADD Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::MUFU: {
@@ -1467,31 +1555,31 @@ private:
switch (instr.sub_op) {
case SubOp::Cos:
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Sin:
regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Ex2:
regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Lg2:
regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Rcp:
regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Rsq:
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
case SubOp::Sqrt:
regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
- instr.alu.saturate_d);
+ instr.alu.saturate_d, 0, true);
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -1512,7 +1600,11 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0,
'(' + condition + ") ? min(" + parameters + ") : max(" +
parameters + ')',
- 1, 1);
+ 1, 1, false, 0, true);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "FMNMX Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::RRO_C:
@@ -1525,14 +1617,15 @@ private:
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}",
+ opcode->get().GetName());
UNREACHABLE();
}
}
break;
}
case OpCode::Type::ArithmeticImmediate: {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::MOV32_IMM: {
regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
break;
@@ -1541,7 +1634,11 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0,
regs.GetRegisterAsFloat(instr.gpr8) + " * " +
GetImmediate32(instr),
- 1, 1, instr.fmul32.saturate);
+ 1, 1, instr.fmul32.saturate, 0, true);
+ if (instr.op_32.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "FMUL32 Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::FADD32I: {
@@ -1564,7 +1661,11 @@ private:
op_b = "-(" + op_b + ')';
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
+ if (instr.op_32.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "FADD32 Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
}
@@ -1576,7 +1677,7 @@ private:
std::string op_a = instr.bfe.negate_a ? "-" : "";
op_a += regs.GetRegisterAsInteger(instr.gpr8);
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::BFE_IMM: {
std::string inner_shift =
'(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
@@ -1585,10 +1686,14 @@ private:
std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "BFE Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->get().GetName());
UNREACHABLE();
}
}
@@ -1610,7 +1715,7 @@ private:
}
}
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::SHR_C:
case OpCode::Id::SHR_R:
case OpCode::Id::SHR_IMM: {
@@ -1622,15 +1727,23 @@ private:
// Cast to int is superfluous for arithmetic shift, it's only for a logical shift
regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
1, 1);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "SHR Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::SHL_C:
case OpCode::Id::SHL_R:
case OpCode::Id::SHL_IMM:
regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "SHL Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->get().GetName());
UNREACHABLE();
}
}
@@ -1640,13 +1753,17 @@ private:
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
std::string op_b = std::to_string(instr.alu.imm20_32.Value());
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::IADD32I:
if (instr.iadd32i.negate_a)
op_a = "-(" + op_a + ')';
regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
instr.iadd32i.saturate != 0);
+ if (instr.op_32.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "IADD32 Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
case OpCode::Id::LOP32I: {
if (instr.alu.lop32i.invert_a)
@@ -1658,11 +1775,15 @@ private:
WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
Tegra::Shader::PredicateResultMode::None,
Tegra::Shader::Pred::UnusedIndex);
+ if (instr.op_32.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "LOP32I Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
- opcode->GetName());
+ opcode->get().GetName());
UNREACHABLE();
}
}
@@ -1682,7 +1803,7 @@ private:
}
}
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::IADD_C:
case OpCode::Id::IADD_R:
case OpCode::Id::IADD_IMM: {
@@ -1694,6 +1815,10 @@ private:
regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
instr.alu.saturate_d);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "IADD Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::IADD3_C:
@@ -1718,7 +1843,7 @@ private:
}
};
- if (opcode->GetId() == OpCode::Id::IADD3_R) {
+ if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
apply_height(instr.iadd3.height_a, op_a);
apply_height(instr.iadd3.height_b, op_b);
apply_height(instr.iadd3.height_c, op_c);
@@ -1734,7 +1859,7 @@ private:
op_c = "-(" + op_c + ')';
std::string result;
- if (opcode->GetId() == OpCode::Id::IADD3_R) {
+ if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
switch (instr.iadd3.mode) {
case Tegra::Shader::IAdd3Mode::RightShift:
// TODO(tech4me): According to
@@ -1755,6 +1880,11 @@ private:
}
regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1);
+
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "IADD3 Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::ISCADD_C:
@@ -1770,6 +1900,10 @@ private:
regs.SetRegisterToInteger(instr.gpr0, true, 0,
"((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "ISCADD Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::POPC_C:
@@ -1801,6 +1935,10 @@ private:
WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "LOP Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::LOP3_C:
@@ -1809,13 +1947,17 @@ private:
const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
std::string lut;
- if (opcode->GetId() == OpCode::Id::LOP3_R) {
+ if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
} else {
lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')';
}
WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "LOP3 Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::IMNMX_C:
@@ -1830,6 +1972,10 @@ private:
'(' + condition + ") ? min(" + parameters + ") : max(" +
parameters + ')',
1, 1);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "IMNMX Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::LEA_R2:
@@ -1839,7 +1985,7 @@ private:
case OpCode::Id::LEA_HI: {
std::string op_c;
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::LEA_R2: {
op_a = regs.GetRegisterAsInteger(instr.gpr20);
op_b = regs.GetRegisterAsInteger(instr.gpr39);
@@ -1884,7 +2030,8 @@ private:
op_b = regs.GetRegisterAsInteger(instr.gpr8);
op_a = std::to_string(instr.lea.imm.entry_a);
op_c = std::to_string(instr.lea.imm.entry_b);
- LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}",
+ opcode->get().GetName());
UNREACHABLE();
}
}
@@ -1899,7 +2046,7 @@ private:
}
default: {
LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
- opcode->GetName());
+ opcode->get().GetName());
UNREACHABLE();
}
}
@@ -1907,20 +2054,21 @@ private:
break;
}
case OpCode::Type::ArithmeticHalf: {
- if (opcode->GetId() == OpCode::Id::HADD2_C || opcode->GetId() == OpCode::Id::HADD2_R) {
+ if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
+ opcode->get().GetId() == OpCode::Id::HADD2_R) {
ASSERT_MSG(instr.alu_half.ftz == 0, "Unimplemented");
}
const bool negate_a =
- opcode->GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
+ opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
const bool negate_b =
- opcode->GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
+ opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
const std::string op_a =
GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a,
instr.alu_half.abs_a != 0, negate_a);
std::string op_b;
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
@@ -1938,7 +2086,7 @@ private:
op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b);
const std::string result = [&]() {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HADD2_R:
return '(' + op_a + " + " + op_b + ')';
@@ -1946,7 +2094,8 @@ private:
case OpCode::Id::HMUL2_R:
return '(' + op_a + " * " + op_b + ')';
default:
- LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}",
+ opcode->get().GetName());
UNREACHABLE();
return std::string("0");
}
@@ -1957,7 +2106,7 @@ private:
break;
}
case OpCode::Type::ArithmeticHalfImmediate: {
- if (opcode->GetId() == OpCode::Id::HADD2_IMM) {
+ if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
ASSERT_MSG(instr.alu_half_imm.ftz == 0, "Unimplemented");
} else {
ASSERT_MSG(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None,
@@ -1971,7 +2120,7 @@ private:
const std::string op_b = UnpackHalfImmediate(instr, true);
const std::string result = [&]() {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_IMM:
return op_a + " + " + op_b;
case OpCode::Id::HMUL2_IMM:
@@ -1997,7 +2146,7 @@ private:
ASSERT_MSG(instr.ffma.tab5980_1 == 0, "FFMA tab5980_1({}) not implemented",
instr.ffma.tab5980_1.Value());
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::FFMA_CR: {
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
GLSLRegister::Type::Float);
@@ -2021,24 +2170,29 @@ private:
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->get().GetName());
UNREACHABLE();
}
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
- instr.alu.saturate_d);
+ regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
+ 1, 1, instr.alu.saturate_d, 0, true);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "FFMA Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
+
break;
}
case OpCode::Type::Hfma2: {
- if (opcode->GetId() == OpCode::Id::HFMA2_RR) {
+ if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None,
"Unimplemented");
} else {
ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None,
"Unimplemented");
}
- const bool saturate = opcode->GetId() == OpCode::Id::HFMA2_RR
+ const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR
? instr.hfma2.rr.saturate != 0
: instr.hfma2.saturate != 0;
@@ -2046,7 +2200,7 @@ private:
GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a);
std::string op_b, op_c;
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::HFMA2_CR:
op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
GLSLRegister::Type::UnsignedInteger),
@@ -2084,7 +2238,7 @@ private:
break;
}
case OpCode::Type::Conversion: {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::I2I_R: {
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
@@ -2132,6 +2286,11 @@ private:
}
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "I2F Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::F2F_R: {
@@ -2170,6 +2329,11 @@ private:
}
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
+
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "F2F Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::F2I_R:
@@ -2219,17 +2383,22 @@ private:
regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
1, false, 0, instr.conversion.dest_size);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "F2I Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}",
+ opcode->get().GetName());
UNREACHABLE();
}
}
break;
}
case OpCode::Type::Memory: {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::LD_A: {
// Note: Shouldn't this be interp mode flat? As in no interpolation made.
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
@@ -2299,6 +2468,39 @@ private:
shader.AddLine("}");
break;
}
+ case OpCode::Id::LD_L: {
+ // Add an extra scope and declare the index register inside to prevent
+ // overwriting it in case it is used as an output of the LD instruction.
+ shader.AddLine('{');
+ ++shader.scope;
+
+ std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
+ std::to_string(instr.smem_imm.Value()) + ')';
+
+ shader.AddLine("uint index = (" + op + " / 4);");
+
+ const std::string op_a = regs.GetLocalMemoryAsFloat("index");
+
+ if (instr.ld_l.unknown != 1) {
+ LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}",
+ static_cast<unsigned>(instr.ld_l.unknown.Value()));
+ UNREACHABLE();
+ }
+
+ switch (instr.ldst_sl.type.Value()) {
+ case Tegra::Shader::StoreType::Bytes32:
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}",
+ static_cast<unsigned>(instr.ldst_sl.type.Value()));
+ UNREACHABLE();
+ }
+
+ --shader.scope;
+ shader.AddLine('}');
+ break;
+ }
case OpCode::Id::ST_A: {
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
"Indirect attribute loads are not supported");
@@ -2327,6 +2529,37 @@ private:
break;
}
+ case OpCode::Id::ST_L: {
+ // Add an extra scope and declare the index register inside to prevent
+ // overwriting it in case it is used as an output of the LD instruction.
+ shader.AddLine('{');
+ ++shader.scope;
+
+ std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
+ std::to_string(instr.smem_imm.Value()) + ')';
+
+ shader.AddLine("uint index = (" + op + " / 4);");
+
+ if (instr.st_l.unknown != 0) {
+ LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}",
+ static_cast<unsigned>(instr.st_l.unknown.Value()));
+ UNREACHABLE();
+ }
+
+ switch (instr.ldst_sl.type.Value()) {
+ case Tegra::Shader::StoreType::Bytes32:
+ regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0));
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}",
+ static_cast<unsigned>(instr.ldst_sl.type.Value()));
+ UNREACHABLE();
+ }
+
+ --shader.scope;
+ shader.AddLine('}');
+ break;
+ }
case OpCode::Id::TEX: {
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
std::string coord;
@@ -2513,12 +2746,12 @@ private:
}
case 3: {
if (is_array) {
- UNIMPLEMENTED_MSG("3-coordinate arrays not fully implemented");
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
- texture_type = Tegra::Shader::TextureType::Texture2D;
- is_array = false;
+ const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+ const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
+ const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
+ coord =
+ "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index + ");";
} else {
const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
@@ -2548,7 +2781,11 @@ private:
break;
}
case Tegra::Shader::TextureProcessMode::LZ: {
- texture = "textureLod(" + sampler + ", coords, 0.0)";
+ if (depth_compare && is_array) {
+ texture = "texture(" + sampler + ", coords)";
+ } else {
+ texture = "textureLod(" + sampler + ", coords, 0.0)";
+ }
break;
}
case Tegra::Shader::TextureProcessMode::LL: {
@@ -2809,7 +3046,7 @@ private:
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->get().GetName());
UNREACHABLE();
}
}
@@ -2903,7 +3140,7 @@ private:
instr.hsetp2.abs_a, instr.hsetp2.negate_a);
const std::string op_b = [&]() {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::HSETP2_R:
return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
instr.hsetp2.type_b, instr.hsetp2.abs_a,
@@ -2962,10 +3199,15 @@ private:
regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
}
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "PSET Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
+
break;
}
case OpCode::Type::PredicateSetPredicate: {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::PSETP: {
const std::string op_a =
GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
@@ -3011,7 +3253,8 @@ private:
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}",
+ opcode->get().GetName());
UNREACHABLE();
}
}
@@ -3099,7 +3342,7 @@ private:
instr.hset2.abs_a != 0, instr.hset2.negate_a != 0);
const std::string op_b = [&]() {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::HSET2_R:
return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
instr.hset2.type_b, instr.hset2.abs_b != 0,
@@ -3148,7 +3391,7 @@ private:
const bool is_signed{instr.xmad.sign_a == 1};
bool is_merge{};
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR: {
is_merge = instr.xmad.merge_56;
op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
@@ -3177,7 +3420,7 @@ private:
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->get().GetName());
UNREACHABLE();
}
}
@@ -3226,15 +3469,25 @@ private:
}
regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "XMAD Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
break;
}
default: {
- switch (opcode->GetId()) {
+ switch (opcode->get().GetId()) {
case OpCode::Id::EXIT: {
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
EmitFragmentOutputsWrite();
}
+ const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+ if (cc != Tegra::Shader::ControlCode::T) {
+ LOG_CRITICAL(HW_GPU, "EXIT Control Code used: {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ }
+
switch (instr.flow.cond) {
case Tegra::Shader::FlowCondition::Always:
shader.AddLine("return true;");
@@ -3264,6 +3517,11 @@ private:
// Enclose "discard" in a conditional, so that GLSL compilation does not complain
// about unexecuted instructions that may follow this.
+ const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+ if (cc != Tegra::Shader::ControlCode::T) {
+ LOG_CRITICAL(HW_GPU, "KIL Control Code used: {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ }
shader.AddLine("if (true) {");
++shader.scope;
shader.AddLine("discard;");
@@ -3321,6 +3579,11 @@ private:
case OpCode::Id::BRA: {
ASSERT_MSG(instr.bra.constant_buffer == 0,
"BRA with constant buffers are not implemented");
+ const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+ if (cc != Tegra::Shader::ControlCode::T) {
+ LOG_CRITICAL(HW_GPU, "BRA Control Code used: {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ }
const u32 target = offset + instr.bra.GetBranchTarget();
shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
break;
@@ -3361,13 +3624,21 @@ private:
}
case OpCode::Id::SYNC: {
// The SYNC opcode jumps to the address previously set by the SSY opcode
- ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+ const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+ if (cc != Tegra::Shader::ControlCode::T) {
+ LOG_CRITICAL(HW_GPU, "SYNC Control Code used: {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ }
EmitPopFromFlowStack();
break;
}
case OpCode::Id::BRK: {
// The BRK opcode jumps to the address previously set by the PBK opcode
- ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+ const Tegra::Shader::ControlCode cc = instr.flow_control_code;
+ if (cc != Tegra::Shader::ControlCode::T) {
+ LOG_CRITICAL(HW_GPU, "BRK Control Code used: {}", static_cast<u32>(cc));
+ UNREACHABLE();
+ }
EmitPopFromFlowStack();
break;
}
@@ -3397,6 +3668,11 @@ private:
regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
instr.vmad.saturate == 1, 0, Register::Size::Word,
instr.vmad.cc);
+ if (instr.generates_cc) {
+ LOG_CRITICAL(HW_GPU, "VMAD Generates an unhandled Control Code");
+ UNREACHABLE();
+ }
+
break;
}
case OpCode::Id::VSETP: {
@@ -3424,7 +3700,7 @@ private:
break;
}
default: {
- LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
+ LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->get().GetName());
UNREACHABLE();
}
}
@@ -3550,6 +3826,7 @@ private:
const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage;
const std::string& suffix;
+ u64 local_memory_size;
ShaderWriter shader;
ShaderWriter declarations;
@@ -3564,9 +3841,9 @@ std::string GetCommonDeclarations() {
RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4));
}
-boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
- Maxwell3D::Regs::ShaderStage stage,
- const std::string& suffix) {
+std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+ Maxwell3D::Regs::ShaderStage stage,
+ const std::string& suffix) {
try {
const auto subroutines =
ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
@@ -3575,7 +3852,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
} catch (const DecompileFail& exception) {
LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
}
- return boost::none;
+ return {};
}
} // namespace OpenGL::GLShader::Decompiler
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index b20cc4bfa..d01a4a7ee 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -6,8 +6,8 @@
#include <array>
#include <functional>
+#include <optional>
#include <string>
-#include <boost/optional.hpp>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -18,8 +18,8 @@ using Tegra::Engines::Maxwell3D;
std::string GetCommonDeclarations();
-boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
- Maxwell3D::Regs::ShaderStage stage,
- const std::string& suffix);
+std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
+ Maxwell3D::Regs::ShaderStage stage,
+ const std::string& suffix);
} // namespace OpenGL::GLShader::Decompiler
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index e883ffb1d..eea090e52 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -19,9 +19,6 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
out += Decompiler::GetCommonDeclarations();
out += R"(
-out gl_PerVertex {
- vec4 gl_Position;
-};
layout (location = 0) out vec4 position;
@@ -40,7 +37,7 @@ layout(std140) uniform vs_config {
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
- .get_value_or({});
+ .value_or(ProgramResult());
out += program.first;
@@ -48,7 +45,7 @@ layout(std140) uniform vs_config {
ProgramResult program_b =
Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
- .get_value_or({});
+ .value_or(ProgramResult());
out += program_b.first;
}
@@ -85,15 +82,15 @@ void main() {
}
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
- std::string out = "#version 430 core\n";
- out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
+ std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations();
out += "bool exec_geometry();\n";
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Geometry, "geometry")
- .get_value_or({});
+ .value_or(ProgramResult());
out += R"(
out gl_PerVertex {
vec4 gl_Position;
@@ -127,7 +124,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
- .get_value_or({});
+ .value_or(ProgramResult());
out += R"(
layout(location = 0) out vec4 FragColor0;
layout(location = 1) out vec4 FragColor1;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 36fe1f04c..2a069cdd8 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -7,6 +7,7 @@
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL::GLShader {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 1fe26a2a9..98622a058 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -11,9 +11,10 @@
namespace OpenGL {
OpenGLState OpenGLState::cur_state;
-
+bool OpenGLState::s_rgb_used;
OpenGLState::OpenGLState() {
// These all match default OpenGL values
+ framebuffer_srgb.enabled = false;
cull.enabled = false;
cull.mode = GL_BACK;
cull.front_face = GL_CCW;
@@ -22,11 +23,14 @@ OpenGLState::OpenGLState() {
depth.test_func = GL_LESS;
depth.write_mask = GL_TRUE;
- color_mask.red_enabled = GL_TRUE;
- color_mask.green_enabled = GL_TRUE;
- color_mask.blue_enabled = GL_TRUE;
- color_mask.alpha_enabled = GL_TRUE;
-
+ primitive_restart.enabled = false;
+ primitive_restart.index = 0;
+ for (auto& item : color_mask) {
+ item.red_enabled = GL_TRUE;
+ item.green_enabled = GL_TRUE;
+ item.blue_enabled = GL_TRUE;
+ item.alpha_enabled = GL_TRUE;
+ }
stencil.test_enabled = false;
auto reset_stencil = [](auto& config) {
config.test_func = GL_ALWAYS;
@@ -39,19 +43,33 @@ OpenGLState::OpenGLState() {
};
reset_stencil(stencil.front);
reset_stencil(stencil.back);
-
- blend.enabled = true;
- blend.rgb_equation = GL_FUNC_ADD;
- blend.a_equation = GL_FUNC_ADD;
- blend.src_rgb_func = GL_ONE;
- blend.dst_rgb_func = GL_ZERO;
- blend.src_a_func = GL_ONE;
- blend.dst_a_func = GL_ZERO;
- blend.color.red = 0.0f;
- blend.color.green = 0.0f;
- blend.color.blue = 0.0f;
- blend.color.alpha = 0.0f;
-
+ for (auto& item : viewports) {
+ item.x = 0;
+ item.y = 0;
+ item.width = 0;
+ item.height = 0;
+ item.depth_range_near = 0.0f;
+ item.depth_range_far = 1.0f;
+ }
+ scissor.enabled = false;
+ scissor.x = 0;
+ scissor.y = 0;
+ scissor.width = 0;
+ scissor.height = 0;
+ for (auto& item : blend) {
+ item.enabled = true;
+ item.rgb_equation = GL_FUNC_ADD;
+ item.a_equation = GL_FUNC_ADD;
+ item.src_rgb_func = GL_ONE;
+ item.dst_rgb_func = GL_ZERO;
+ item.src_a_func = GL_ONE;
+ item.dst_a_func = GL_ZERO;
+ }
+ independant_blend.enabled = false;
+ blend_color.red = 0.0f;
+ blend_color.green = 0.0f;
+ blend_color.blue = 0.0f;
+ blend_color.alpha = 0.0f;
logic_op.enabled = false;
logic_op.operation = GL_COPY;
@@ -67,138 +85,309 @@ OpenGLState::OpenGLState() {
draw.shader_program = 0;
draw.program_pipeline = 0;
- scissor.enabled = false;
- scissor.x = 0;
- scissor.y = 0;
- scissor.width = 0;
- scissor.height = 0;
-
- viewport.x = 0;
- viewport.y = 0;
- viewport.width = 0;
- viewport.height = 0;
-
clip_distance = {};
point.size = 1;
}
-void OpenGLState::Apply() const {
+void OpenGLState::ApplyDefaultState() {
+ glDisable(GL_FRAMEBUFFER_SRGB);
+ glDisable(GL_CULL_FACE);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_PRIMITIVE_RESTART);
+ glDisable(GL_STENCIL_TEST);
+ glEnable(GL_BLEND);
+ glDisable(GL_COLOR_LOGIC_OP);
+ glDisable(GL_SCISSOR_TEST);
+}
+
+void OpenGLState::ApplySRgb() const {
+ // sRGB
+ if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
+ if (framebuffer_srgb.enabled) {
+ // Track if sRGB is used
+ s_rgb_used = true;
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
+ }
+ }
+}
+
+void OpenGLState::ApplyCulling() const {
// Culling
- if (cull.enabled != cur_state.cull.enabled) {
+ const bool cull_changed = cull.enabled != cur_state.cull.enabled;
+ if (cull_changed) {
if (cull.enabled) {
glEnable(GL_CULL_FACE);
} else {
glDisable(GL_CULL_FACE);
}
}
+ if (cull.enabled) {
+ if (cull_changed || cull.mode != cur_state.cull.mode) {
+ glCullFace(cull.mode);
+ }
- if (cull.mode != cur_state.cull.mode) {
- glCullFace(cull.mode);
+ if (cull_changed || cull.front_face != cur_state.cull.front_face) {
+ glFrontFace(cull.front_face);
+ }
}
+}
- if (cull.front_face != cur_state.cull.front_face) {
- glFrontFace(cull.front_face);
+void OpenGLState::ApplyColorMask() const {
+ if (GLAD_GL_ARB_viewport_array) {
+ for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+ const auto& updated = color_mask[i];
+ const auto& current = cur_state.color_mask[i];
+ if (updated.red_enabled != current.red_enabled ||
+ updated.green_enabled != current.green_enabled ||
+ updated.blue_enabled != current.blue_enabled ||
+ updated.alpha_enabled != current.alpha_enabled) {
+ glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+ updated.blue_enabled, updated.alpha_enabled);
+ }
+ }
+ } else {
+ const auto& updated = color_mask[0];
+ const auto& current = cur_state.color_mask[0];
+ if (updated.red_enabled != current.red_enabled ||
+ updated.green_enabled != current.green_enabled ||
+ updated.blue_enabled != current.blue_enabled ||
+ updated.alpha_enabled != current.alpha_enabled) {
+ glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
+ updated.alpha_enabled);
+ }
}
+}
+void OpenGLState::ApplyDepth() const {
// Depth test
- if (depth.test_enabled != cur_state.depth.test_enabled) {
+ const bool depth_test_changed = depth.test_enabled != cur_state.depth.test_enabled;
+ if (depth_test_changed) {
if (depth.test_enabled) {
glEnable(GL_DEPTH_TEST);
} else {
glDisable(GL_DEPTH_TEST);
}
}
-
- if (depth.test_func != cur_state.depth.test_func) {
+ if (depth.test_enabled &&
+ (depth_test_changed || depth.test_func != cur_state.depth.test_func)) {
glDepthFunc(depth.test_func);
}
-
// Depth mask
if (depth.write_mask != cur_state.depth.write_mask) {
glDepthMask(depth.write_mask);
}
+}
- // Color mask
- if (color_mask.red_enabled != cur_state.color_mask.red_enabled ||
- color_mask.green_enabled != cur_state.color_mask.green_enabled ||
- color_mask.blue_enabled != cur_state.color_mask.blue_enabled ||
- color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) {
- glColorMask(color_mask.red_enabled, color_mask.green_enabled, color_mask.blue_enabled,
- color_mask.alpha_enabled);
+void OpenGLState::ApplyPrimitiveRestart() const {
+ const bool primitive_restart_changed =
+ primitive_restart.enabled != cur_state.primitive_restart.enabled;
+ if (primitive_restart_changed) {
+ if (primitive_restart.enabled) {
+ glEnable(GL_PRIMITIVE_RESTART);
+ } else {
+ glDisable(GL_PRIMITIVE_RESTART);
+ }
+ }
+ if (primitive_restart_changed ||
+ (primitive_restart.enabled &&
+ primitive_restart.index != cur_state.primitive_restart.index)) {
+ glPrimitiveRestartIndex(primitive_restart.index);
}
+}
- // Stencil test
- if (stencil.test_enabled != cur_state.stencil.test_enabled) {
+void OpenGLState::ApplyStencilTest() const {
+ const bool stencil_test_changed = stencil.test_enabled != cur_state.stencil.test_enabled;
+ if (stencil_test_changed) {
if (stencil.test_enabled) {
glEnable(GL_STENCIL_TEST);
} else {
glDisable(GL_STENCIL_TEST);
}
}
- auto config_stencil = [](GLenum face, const auto& config, const auto& prev_config) {
- if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
- config.test_mask != prev_config.test_mask) {
- glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
+ if (stencil.test_enabled) {
+ auto config_stencil = [stencil_test_changed](GLenum face, const auto& config,
+ const auto& prev_config) {
+ if (stencil_test_changed || config.test_func != prev_config.test_func ||
+ config.test_ref != prev_config.test_ref ||
+ config.test_mask != prev_config.test_mask) {
+ glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
+ }
+ if (stencil_test_changed || config.action_depth_fail != prev_config.action_depth_fail ||
+ config.action_depth_pass != prev_config.action_depth_pass ||
+ config.action_stencil_fail != prev_config.action_stencil_fail) {
+ glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
+ config.action_depth_pass);
+ }
+ if (config.write_mask != prev_config.write_mask) {
+ glStencilMaskSeparate(face, config.write_mask);
+ }
+ };
+ config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
+ config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
+ }
+}
+
+void OpenGLState::ApplyScissor() const {
+ const bool scissor_changed = scissor.enabled != cur_state.scissor.enabled;
+ if (scissor_changed) {
+ if (scissor.enabled) {
+ glEnable(GL_SCISSOR_TEST);
+ } else {
+ glDisable(GL_SCISSOR_TEST);
}
- if (config.action_depth_fail != prev_config.action_depth_fail ||
- config.action_depth_pass != prev_config.action_depth_pass ||
- config.action_stencil_fail != prev_config.action_stencil_fail) {
- glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
- config.action_depth_pass);
+ }
+ if (scissor.enabled &&
+ (scissor_changed || scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y ||
+ scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height)) {
+ glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
+ }
+}
+
+void OpenGLState::ApplyViewport() const {
+ if (GLAD_GL_ARB_viewport_array) {
+ for (GLuint i = 0;
+ i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); i++) {
+ const auto& current = cur_state.viewports[i];
+ const auto& updated = viewports[i];
+ if (updated.x != current.x || updated.y != current.y ||
+ updated.width != current.width || updated.height != current.height) {
+ glViewportIndexedf(i, updated.x, updated.y, updated.width, updated.height);
+ }
+ if (updated.depth_range_near != current.depth_range_near ||
+ updated.depth_range_far != current.depth_range_far) {
+ glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
+ }
}
- if (config.write_mask != prev_config.write_mask) {
- glStencilMaskSeparate(face, config.write_mask);
+ } else {
+ const auto& current = cur_state.viewports[0];
+ const auto& updated = viewports[0];
+ if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
+ updated.height != current.height) {
+ glViewport(static_cast<GLint>(updated.x), static_cast<GLint>(updated.y),
+ static_cast<GLsizei>(updated.width), static_cast<GLsizei>(updated.height));
}
- };
- config_stencil(GL_FRONT, stencil.front, cur_state.stencil.front);
- config_stencil(GL_BACK, stencil.back, cur_state.stencil.back);
+ if (updated.depth_range_near != current.depth_range_near ||
+ updated.depth_range_far != current.depth_range_far) {
+ glDepthRange(updated.depth_range_near, updated.depth_range_far);
+ }
+ }
+}
- // Blending
- if (blend.enabled != cur_state.blend.enabled) {
- if (blend.enabled) {
- ASSERT(!logic_op.enabled);
+void OpenGLState::ApplyGlobalBlending() const {
+ const Blend& current = cur_state.blend[0];
+ const Blend& updated = blend[0];
+ const bool blend_changed = updated.enabled != current.enabled;
+ if (blend_changed) {
+ if (updated.enabled) {
glEnable(GL_BLEND);
} else {
glDisable(GL_BLEND);
}
}
+ if (!updated.enabled) {
+ return;
+ }
+ if (updated.separate_alpha) {
+ if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+ updated.dst_rgb_func != current.dst_rgb_func ||
+ updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) {
+ glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+ updated.dst_a_func);
+ }
+
+ if (blend_changed || updated.rgb_equation != current.rgb_equation ||
+ updated.a_equation != current.a_equation) {
+ glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
+ }
+ } else {
+ if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+ updated.dst_rgb_func != current.dst_rgb_func) {
+ glBlendFunc(updated.src_rgb_func, updated.dst_rgb_func);
+ }
+
+ if (blend_changed || updated.rgb_equation != current.rgb_equation) {
+ glBlendEquation(updated.rgb_equation);
+ }
+ }
+}
- if (blend.color.red != cur_state.blend.color.red ||
- blend.color.green != cur_state.blend.color.green ||
- blend.color.blue != cur_state.blend.color.blue ||
- blend.color.alpha != cur_state.blend.color.alpha) {
- glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha);
+void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
+ const Blend& updated = blend[target];
+ const Blend& current = cur_state.blend[target];
+ const bool blend_changed = updated.enabled != current.enabled || force;
+ if (blend_changed) {
+ if (updated.enabled) {
+ glEnablei(GL_BLEND, static_cast<GLuint>(target));
+ } else {
+ glDisablei(GL_BLEND, static_cast<GLuint>(target));
+ }
+ }
+ if (!updated.enabled) {
+ return;
}
+ if (updated.separate_alpha) {
+ if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+ updated.dst_rgb_func != current.dst_rgb_func ||
+ updated.src_a_func != current.src_a_func || updated.dst_a_func != current.dst_a_func) {
+ glBlendFuncSeparateiARB(static_cast<GLuint>(target), updated.src_rgb_func,
+ updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
+ }
+
+ if (blend_changed || updated.rgb_equation != current.rgb_equation ||
+ updated.a_equation != current.a_equation) {
+ glBlendEquationSeparateiARB(static_cast<GLuint>(target), updated.rgb_equation,
+ updated.a_equation);
+ }
+ } else {
+ if (blend_changed || updated.src_rgb_func != current.src_rgb_func ||
+ updated.dst_rgb_func != current.dst_rgb_func) {
+ glBlendFunciARB(static_cast<GLuint>(target), updated.src_rgb_func,
+ updated.dst_rgb_func);
+ }
- if (blend.src_rgb_func != cur_state.blend.src_rgb_func ||
- blend.dst_rgb_func != cur_state.blend.dst_rgb_func ||
- blend.src_a_func != cur_state.blend.src_a_func ||
- blend.dst_a_func != cur_state.blend.dst_a_func) {
- glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func,
- blend.dst_a_func);
+ if (blend_changed || updated.rgb_equation != current.rgb_equation) {
+ glBlendEquationiARB(static_cast<GLuint>(target), updated.rgb_equation);
+ }
}
+}
- if (blend.rgb_equation != cur_state.blend.rgb_equation ||
- blend.a_equation != cur_state.blend.a_equation) {
- glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
+void OpenGLState::ApplyBlending() const {
+ if (independant_blend.enabled) {
+ for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
+ ApplyTargetBlending(i,
+ independant_blend.enabled != cur_state.independant_blend.enabled);
+ }
+ } else {
+ ApplyGlobalBlending();
+ }
+ if (blend_color.red != cur_state.blend_color.red ||
+ blend_color.green != cur_state.blend_color.green ||
+ blend_color.blue != cur_state.blend_color.blue ||
+ blend_color.alpha != cur_state.blend_color.alpha) {
+ glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
+}
- // Logic Operation
- if (logic_op.enabled != cur_state.logic_op.enabled) {
+void OpenGLState::ApplyLogicOp() const {
+ const bool logic_op_changed = logic_op.enabled != cur_state.logic_op.enabled;
+ if (logic_op_changed) {
if (logic_op.enabled) {
- ASSERT(!blend.enabled);
glEnable(GL_COLOR_LOGIC_OP);
} else {
glDisable(GL_COLOR_LOGIC_OP);
}
}
- if (logic_op.operation != cur_state.logic_op.operation) {
+ if (logic_op.enabled &&
+ (logic_op_changed || logic_op.operation != cur_state.logic_op.operation)) {
glLogicOp(logic_op.operation);
}
+}
- // Textures
+void OpenGLState::ApplyTextures() const {
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
const auto& cur_state_texture_unit = cur_state.texture_units[i];
@@ -217,28 +406,29 @@ void OpenGLState::Apply() const {
glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
}
}
+}
- // Samplers
- {
- bool has_delta{};
- std::size_t first{}, last{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
- for (std::size_t i = 0; i < std::size(samplers); ++i) {
- samplers[i] = texture_units[i].sampler;
- if (samplers[i] != cur_state.texture_units[i].sampler) {
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+void OpenGLState::ApplySamplers() const {
+ bool has_delta{};
+ std::size_t first{}, last{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+ for (std::size_t i = 0; i < std::size(samplers); ++i) {
+ samplers[i] = texture_units[i].sampler;
+ if (samplers[i] != cur_state.texture_units[i].sampler) {
+ if (!has_delta) {
+ first = i;
+ has_delta = true;
}
+ last = i;
}
- if (has_delta) {
- glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- samplers.data());
- }
}
+ if (has_delta) {
+ glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
+ samplers.data());
+ }
+}
+void OpenGLState::ApplyFramebufferState() const {
// Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@@ -246,7 +436,9 @@ void OpenGLState::Apply() const {
if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
}
+}
+void OpenGLState::ApplyVertexBufferState() const {
// Vertex array
if (draw.vertex_array != cur_state.draw.vertex_array) {
glBindVertexArray(draw.vertex_array);
@@ -256,7 +448,11 @@ void OpenGLState::Apply() const {
if (draw.vertex_buffer != cur_state.draw.vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, draw.vertex_buffer);
}
+}
+void OpenGLState::Apply() const {
+ ApplyFramebufferState();
+ ApplyVertexBufferState();
// Uniform buffer
if (draw.uniform_buffer != cur_state.draw.uniform_buffer) {
glBindBuffer(GL_UNIFORM_BUFFER, draw.uniform_buffer);
@@ -271,27 +467,6 @@ void OpenGLState::Apply() const {
if (draw.program_pipeline != cur_state.draw.program_pipeline) {
glBindProgramPipeline(draw.program_pipeline);
}
-
- // Scissor test
- if (scissor.enabled != cur_state.scissor.enabled) {
- if (scissor.enabled) {
- glEnable(GL_SCISSOR_TEST);
- } else {
- glDisable(GL_SCISSOR_TEST);
- }
- }
-
- if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y ||
- scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) {
- glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
- }
-
- if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y ||
- viewport.width != cur_state.viewport.width ||
- viewport.height != cur_state.viewport.height) {
- glViewport(viewport.x, viewport.y, viewport.width, viewport.height);
- }
-
// Clip distance
for (std::size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) {
@@ -302,12 +477,22 @@ void OpenGLState::Apply() const {
}
}
}
-
// Point
if (point.size != cur_state.point.size) {
glPointSize(point.size);
}
-
+ ApplyColorMask();
+ ApplyViewport();
+ ApplyScissor();
+ ApplyStencilTest();
+ ApplySRgb();
+ ApplyCulling();
+ ApplyDepth();
+ ApplyPrimitiveRestart();
+ ApplyBlending();
+ ApplyLogicOp();
+ ApplyTextures();
+ ApplySamplers();
cur_state = *this;
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index dc21a2ee3..e5d1baae6 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -36,6 +36,10 @@ constexpr TextureUnit ProcTexDiffLUT{9};
class OpenGLState {
public:
struct {
+ bool enabled; // GL_FRAMEBUFFER_SRGB
+ } framebuffer_srgb;
+
+ struct {
bool enabled; // GL_CULL_FACE
GLenum mode; // GL_CULL_FACE_MODE
GLenum front_face; // GL_FRONT_FACE
@@ -48,12 +52,18 @@ public:
} depth;
struct {
+ bool enabled;
+ GLuint index;
+ } primitive_restart; // GL_PRIMITIVE_RESTART
+
+ struct ColorMask {
GLboolean red_enabled;
GLboolean green_enabled;
GLboolean blue_enabled;
GLboolean alpha_enabled;
- } color_mask; // GL_COLOR_WRITEMASK
-
+ };
+ std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
+ color_mask; // GL_COLOR_WRITEMASK
struct {
bool test_enabled; // GL_STENCIL_TEST
struct {
@@ -67,22 +77,28 @@ public:
} front, back;
} stencil;
- struct {
+ struct Blend {
bool enabled; // GL_BLEND
+ bool separate_alpha; // Independent blend enabled
GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func; // GL_BLEND_DST_RGB
GLenum src_a_func; // GL_BLEND_SRC_ALPHA
GLenum dst_a_func; // GL_BLEND_DST_ALPHA
+ };
+ std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
- struct {
- GLclampf red;
- GLclampf green;
- GLclampf blue;
- GLclampf alpha;
- } color; // GL_BLEND_COLOR
- } blend;
+ struct {
+ bool enabled;
+ } independant_blend;
+
+ struct {
+ GLclampf red;
+ GLclampf green;
+ GLclampf blue;
+ GLclampf alpha;
+ } blend_color; // GL_BLEND_COLOR
struct {
bool enabled; // GL_LOGIC_OP_MODE
@@ -127,6 +143,16 @@ public:
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
} draw;
+ struct viewport {
+ GLfloat x;
+ GLfloat y;
+ GLfloat width;
+ GLfloat height;
+ GLfloat depth_range_near; // GL_DEPTH_RANGE
+ GLfloat depth_range_far; // GL_DEPTH_RANGE
+ };
+ std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> viewports;
+
struct {
bool enabled; // GL_SCISSOR_TEST
GLint x;
@@ -136,13 +162,6 @@ public:
} scissor;
struct {
- GLint x;
- GLint y;
- GLsizei width;
- GLsizei height;
- } viewport;
-
- struct {
float size; // GL_POINT_SIZE
} point;
@@ -154,10 +173,20 @@ public:
static OpenGLState GetCurState() {
return cur_state;
}
-
+ static bool GetsRGBUsed() {
+ return s_rgb_used;
+ }
+ static void ClearsRGBUsed() {
+ s_rgb_used = false;
+ }
/// Apply this state as the current OpenGL state
void Apply() const;
-
+ /// Apply only the state afecting the framebuffer
+ void ApplyFramebufferState() const;
+ /// Apply only the state afecting the vertex buffer
+ void ApplyVertexBufferState() const;
+ /// Set the initial OpenGL state
+ static void ApplyDefaultState();
/// Resets any references to the given resource
OpenGLState& UnbindTexture(GLuint handle);
OpenGLState& ResetSampler(GLuint handle);
@@ -169,6 +198,23 @@ public:
private:
static OpenGLState cur_state;
+ // Workaround for sRGB problems caused by
+ // QT not supporting srgb output
+ static bool s_rgb_used;
+ void ApplySRgb() const;
+ void ApplyCulling() const;
+ void ApplyColorMask() const;
+ void ApplyDepth() const;
+ void ApplyPrimitiveRestart() const;
+ void ApplyStencilTest() const;
+ void ApplyViewport() const;
+ void ApplyTargetBlending(std::size_t target, bool force) const;
+ void ApplyGlobalBlending() const;
+ void ApplyBlending() const;
+ void ApplyLogicOp() const;
+ void ApplyTextures() const;
+ void ApplySamplers() const;
+ void ApplyScissor() const;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e409228cc..b97b895a4 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -6,9 +6,13 @@
#include <vector>
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
+MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
+ MP_RGB(128, 128, 192));
+
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
@@ -75,6 +79,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
}
if (invalidate || !persistent) {
+ MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 0f6dcab2b..3ce2cc6d2 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -135,17 +135,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return {};
}
-inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) {
+inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
+ Tegra::Texture::TextureMipmapFilter mip_filter_mode) {
switch (filter_mode) {
- case Tegra::Texture::TextureFilter::Linear:
- return GL_LINEAR;
- case Tegra::Texture::TextureFilter::Nearest:
- return GL_NEAREST;
+ case Tegra::Texture::TextureFilter::Linear: {
+ switch (mip_filter_mode) {
+ case Tegra::Texture::TextureMipmapFilter::None:
+ return GL_LINEAR;
+ case Tegra::Texture::TextureMipmapFilter::Nearest:
+ return GL_NEAREST_MIPMAP_LINEAR;
+ case Tegra::Texture::TextureMipmapFilter::Linear:
+ return GL_LINEAR_MIPMAP_LINEAR;
+ }
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
- static_cast<u32>(filter_mode));
- UNREACHABLE();
- return {};
+ case Tegra::Texture::TextureFilter::Nearest: {
+ switch (mip_filter_mode) {
+ case Tegra::Texture::TextureMipmapFilter::None:
+ return GL_NEAREST;
+ case Tegra::Texture::TextureMipmapFilter::Nearest:
+ return GL_NEAREST_MIPMAP_NEAREST;
+ case Tegra::Texture::TextureMipmapFilter::Linear:
+ return GL_LINEAR_MIPMAP_NEAREST;
+ }
+ }
+ }
+ LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode));
+ return GL_LINEAR;
}
inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
@@ -166,9 +181,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return GL_MIRROR_CLAMP_TO_EDGE;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
+ return GL_REPEAT;
}
inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
@@ -190,10 +204,9 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
case Tegra::Texture::DepthCompareFunc::Always:
return GL_ALWAYS;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented texture depth compare function ={}",
- static_cast<u32>(func));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}",
+ static_cast<u32>(func));
+ return GL_GREATER;
}
inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
@@ -209,9 +222,8 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
case Maxwell::Blend::Equation::Max:
return GL_MAX;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+ return GL_FUNC_ADD;
}
inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
@@ -274,9 +286,8 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
return GL_ONE_MINUS_CONSTANT_ALPHA;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
+ return GL_ZERO;
}
inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
@@ -295,9 +306,8 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
case Tegra::Texture::SwizzleSource::OneFloat:
return GL_ONE;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+ return GL_ZERO;
}
inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
@@ -327,33 +337,39 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
case Maxwell::ComparisonOp::AlwaysOld:
return GL_ALWAYS;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison));
+ return GL_ALWAYS;
}
inline GLenum StencilOp(Maxwell::StencilOp stencil) {
switch (stencil) {
case Maxwell::StencilOp::Keep:
+ case Maxwell::StencilOp::KeepOGL:
return GL_KEEP;
case Maxwell::StencilOp::Zero:
+ case Maxwell::StencilOp::ZeroOGL:
return GL_ZERO;
case Maxwell::StencilOp::Replace:
+ case Maxwell::StencilOp::ReplaceOGL:
return GL_REPLACE;
case Maxwell::StencilOp::Incr:
+ case Maxwell::StencilOp::IncrOGL:
return GL_INCR;
case Maxwell::StencilOp::Decr:
+ case Maxwell::StencilOp::DecrOGL:
return GL_DECR;
case Maxwell::StencilOp::Invert:
+ case Maxwell::StencilOp::InvertOGL:
return GL_INVERT;
case Maxwell::StencilOp::IncrWrap:
+ case Maxwell::StencilOp::IncrWrapOGL:
return GL_INCR_WRAP;
case Maxwell::StencilOp::DecrWrap:
+ case Maxwell::StencilOp::DecrWrapOGL:
return GL_DECR_WRAP;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil));
+ return GL_KEEP;
}
inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
@@ -363,9 +379,8 @@ inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
case Maxwell::Cull::FrontFace::CounterClockWise:
return GL_CCW;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
+ return GL_CCW;
}
inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) {
@@ -377,9 +392,8 @@ inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) {
case Maxwell::Cull::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
+ return GL_BACK;
}
inline GLenum LogicOp(Maxwell::LogicOperation operation) {
@@ -417,9 +431,8 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
case Maxwell::LogicOperation::Set:
return GL_SET;
}
- LOG_CRITICAL(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
- UNREACHABLE();
- return {};
+ LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation));
+ return GL_COPY;
}
} // namespace MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 96d916b07..ea38da932 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -115,7 +115,8 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window)
RendererOpenGL::~RendererOpenGL() = default;
/// Swap buffers (render frame)
-void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) {
+void RendererOpenGL::SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
ScopeAcquireGLContext acquire_context{render_window};
Core::System::GetInstance().GetPerfStats().EndSystemFrame();
@@ -124,11 +125,11 @@ void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&
OpenGLState prev_state = OpenGLState::GetCurState();
state.Apply();
- if (framebuffer != boost::none) {
+ if (framebuffer) {
// If framebuffer is provided, reload it from memory to a texture
- if (screen_info.texture.width != (GLsizei)framebuffer->width ||
- screen_info.texture.height != (GLsizei)framebuffer->height ||
- screen_info.texture.pixel_format != framebuffer->pixel_format) {
+ if (screen_info.texture.width != (GLsizei)framebuffer->get().width ||
+ screen_info.texture.height != (GLsizei)framebuffer->get().height ||
+ screen_info.texture.pixel_format != framebuffer->get().pixel_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
@@ -283,7 +284,8 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
-
+ // Initialize sRGB Usage
+ OpenGLState::ClearsRGBUsed();
rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info);
}
@@ -356,13 +358,20 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
state.texture_units[0].texture = screen_info.display_texture;
state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
+ // Workaround brigthness problems in SMO by enabling sRGB in the final output
+ // if it has been used in the frame
+ // Needed because of this bug in QT
+ // QTBUG-50987
+ state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
state.Apply();
-
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
-
+ // restore default state
+ state.framebuffer_srgb.enabled = false;
state.texture_units[0].texture = 0;
state.Apply();
+ // Clear sRGB state for the next frame
+ OpenGLState::ClearsRGBUsed();
}
/**
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 961467a62..c0868c0e4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -51,7 +51,8 @@ public:
~RendererOpenGL() override;
/// Swap buffers (render frame)
- void SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) override;
+ void SwapBuffers(
+ std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
/// Initialize the renderer
bool Init() override;
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
new file mode 100644
index 000000000..d84634cb3
--- /dev/null
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -0,0 +1,38 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <fmt/format.h>
+#include <glad/glad.h>
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/utils.h"
+
+namespace OpenGL {
+
+void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info) {
+ if (!GLAD_GL_KHR_debug) {
+ return; // We don't need to throw an error as this is just for debugging
+ }
+ const std::string nice_addr = fmt::format("0x{:016x}", addr);
+ std::string object_label;
+
+ if (extra_info.empty()) {
+ switch (identifier) {
+ case GL_TEXTURE:
+ object_label = "Texture@" + nice_addr;
+ break;
+ case GL_PROGRAM:
+ object_label = "Shader@" + nice_addr;
+ break;
+ default:
+ object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
+ break;
+ }
+ } else {
+ object_label = extra_info + '@' + nice_addr;
+ }
+ glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
+}
+
+} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
new file mode 100644
index 000000000..1fcb6fc11
--- /dev/null
+++ b/src/video_core/renderer_opengl/utils.h
@@ -0,0 +1,15 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include <glad/glad.h>
+#include "common/common_types.h"
+
+namespace OpenGL {
+
+void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string extra_info = "");
+
+} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
new file mode 100644
index 000000000..9582dd2ca
--- /dev/null
+++ b/src/video_core/surface.cpp
@@ -0,0 +1,490 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/common_types.h"
+#include "common/math_util.h"
+#include "video_core/surface.h"
+
+namespace VideoCore::Surface {
+
+SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) {
+ switch (texture_type) {
+ case Tegra::Texture::TextureType::Texture1D:
+ return SurfaceTarget::Texture1D;
+ case Tegra::Texture::TextureType::Texture2D:
+ case Tegra::Texture::TextureType::Texture2DNoMipmap:
+ return SurfaceTarget::Texture2D;
+ case Tegra::Texture::TextureType::Texture3D:
+ return SurfaceTarget::Texture3D;
+ case Tegra::Texture::TextureType::TextureCubemap:
+ return SurfaceTarget::TextureCubemap;
+ case Tegra::Texture::TextureType::TextureCubeArray:
+ return SurfaceTarget::TextureCubeArray;
+ case Tegra::Texture::TextureType::Texture1DArray:
+ return SurfaceTarget::Texture1DArray;
+ case Tegra::Texture::TextureType::Texture2DArray:
+ return SurfaceTarget::Texture2DArray;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
+ UNREACHABLE();
+ return SurfaceTarget::Texture2D;
+ }
+}
+
+bool SurfaceTargetIsLayered(SurfaceTarget target) {
+ switch (target) {
+ case SurfaceTarget::Texture1D:
+ case SurfaceTarget::Texture2D:
+ case SurfaceTarget::Texture3D:
+ return false;
+ case SurfaceTarget::Texture1DArray:
+ case SurfaceTarget::Texture2DArray:
+ case SurfaceTarget::TextureCubemap:
+ case SurfaceTarget::TextureCubeArray:
+ return true;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
+ UNREACHABLE();
+ return false;
+ }
+}
+
+PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) {
+ switch (format) {
+ case Tegra::DepthFormat::S8_Z24_UNORM:
+ return PixelFormat::S8Z24;
+ case Tegra::DepthFormat::Z24_S8_UNORM:
+ return PixelFormat::Z24S8;
+ case Tegra::DepthFormat::Z32_FLOAT:
+ return PixelFormat::Z32F;
+ case Tegra::DepthFormat::Z16_UNORM:
+ return PixelFormat::Z16;
+ case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
+ return PixelFormat::Z32FS8;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+}
+
+PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
+ switch (format) {
+ // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the
+ // gamma.
+ case Tegra::RenderTargetFormat::RGBA8_SRGB:
+ return PixelFormat::RGBA8_SRGB;
+ case Tegra::RenderTargetFormat::RGBA8_UNORM:
+ return PixelFormat::ABGR8U;
+ case Tegra::RenderTargetFormat::RGBA8_SNORM:
+ return PixelFormat::ABGR8S;
+ case Tegra::RenderTargetFormat::RGBA8_UINT:
+ return PixelFormat::ABGR8UI;
+ case Tegra::RenderTargetFormat::BGRA8_SRGB:
+ return PixelFormat::BGRA8_SRGB;
+ case Tegra::RenderTargetFormat::BGRA8_UNORM:
+ return PixelFormat::BGRA8;
+ case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
+ return PixelFormat::A2B10G10R10U;
+ case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+ return PixelFormat::RGBA16F;
+ case Tegra::RenderTargetFormat::RGBA16_UNORM:
+ return PixelFormat::RGBA16U;
+ case Tegra::RenderTargetFormat::RGBA16_UINT:
+ return PixelFormat::RGBA16UI;
+ case Tegra::RenderTargetFormat::RGBA32_FLOAT:
+ return PixelFormat::RGBA32F;
+ case Tegra::RenderTargetFormat::RG32_FLOAT:
+ return PixelFormat::RG32F;
+ case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
+ return PixelFormat::R11FG11FB10F;
+ case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+ return PixelFormat::B5G6R5U;
+ case Tegra::RenderTargetFormat::BGR5A1_UNORM:
+ return PixelFormat::A1B5G5R5U;
+ case Tegra::RenderTargetFormat::RGBA32_UINT:
+ return PixelFormat::RGBA32UI;
+ case Tegra::RenderTargetFormat::R8_UNORM:
+ return PixelFormat::R8U;
+ case Tegra::RenderTargetFormat::R8_UINT:
+ return PixelFormat::R8UI;
+ case Tegra::RenderTargetFormat::RG16_FLOAT:
+ return PixelFormat::RG16F;
+ case Tegra::RenderTargetFormat::RG16_UINT:
+ return PixelFormat::RG16UI;
+ case Tegra::RenderTargetFormat::RG16_SINT:
+ return PixelFormat::RG16I;
+ case Tegra::RenderTargetFormat::RG16_UNORM:
+ return PixelFormat::RG16;
+ case Tegra::RenderTargetFormat::RG16_SNORM:
+ return PixelFormat::RG16S;
+ case Tegra::RenderTargetFormat::RG8_UNORM:
+ return PixelFormat::RG8U;
+ case Tegra::RenderTargetFormat::RG8_SNORM:
+ return PixelFormat::RG8S;
+ case Tegra::RenderTargetFormat::R16_FLOAT:
+ return PixelFormat::R16F;
+ case Tegra::RenderTargetFormat::R16_UNORM:
+ return PixelFormat::R16U;
+ case Tegra::RenderTargetFormat::R16_SNORM:
+ return PixelFormat::R16S;
+ case Tegra::RenderTargetFormat::R16_UINT:
+ return PixelFormat::R16UI;
+ case Tegra::RenderTargetFormat::R16_SINT:
+ return PixelFormat::R16I;
+ case Tegra::RenderTargetFormat::R32_FLOAT:
+ return PixelFormat::R32F;
+ case Tegra::RenderTargetFormat::R32_UINT:
+ return PixelFormat::R32UI;
+ case Tegra::RenderTargetFormat::RG32_UINT:
+ return PixelFormat::RG32UI;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+}
+
+PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
+ Tegra::Texture::ComponentType component_type,
+ bool is_srgb) {
+ // TODO(Subv): Properly implement this
+ switch (format) {
+ case Tegra::Texture::TextureFormat::A8R8G8B8:
+ if (is_srgb) {
+ return PixelFormat::RGBA8_SRGB;
+ }
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::ABGR8U;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::ABGR8S;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::ABGR8UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::B5G6R5:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::B5G6R5U;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::A2B10G10R10:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::A2B10G10R10U;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::A1B5G5R5:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::A1B5G5R5U;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::R8:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::R8U;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::R8UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::G8R8:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::G8R8U;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::G8R8S;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::RGBA16U;
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RGBA16F;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::BF10GF11RF11:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::R11FG11FB10F;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RGBA32F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::RGBA32UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::R32_G32:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RG32F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::RG32UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::R32_G32_B32:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RGB32F;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::R16:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::R16F;
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::R16U;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::R16S;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::R16UI;
+ case Tegra::Texture::ComponentType::SINT:
+ return PixelFormat::R16I;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::R32:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::R32F;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::R32UI;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::ZF32:
+ return PixelFormat::Z32F;
+ case Tegra::Texture::TextureFormat::Z16:
+ return PixelFormat::Z16;
+ case Tegra::Texture::TextureFormat::Z24S8:
+ return PixelFormat::Z24S8;
+ case Tegra::Texture::TextureFormat::DXT1:
+ return is_srgb ? PixelFormat::DXT1_SRGB : PixelFormat::DXT1;
+ case Tegra::Texture::TextureFormat::DXT23:
+ return is_srgb ? PixelFormat::DXT23_SRGB : PixelFormat::DXT23;
+ case Tegra::Texture::TextureFormat::DXT45:
+ return is_srgb ? PixelFormat::DXT45_SRGB : PixelFormat::DXT45;
+ case Tegra::Texture::TextureFormat::DXN1:
+ return PixelFormat::DXN1;
+ case Tegra::Texture::TextureFormat::DXN2:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::DXN2UNORM;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::DXN2SNORM;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ case Tegra::Texture::TextureFormat::BC7U:
+ return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
+ case Tegra::Texture::TextureFormat::BC6H_UF16:
+ return PixelFormat::BC6H_UF16;
+ case Tegra::Texture::TextureFormat::BC6H_SF16:
+ return PixelFormat::BC6H_SF16;
+ case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
+ return is_srgb ? PixelFormat::ASTC_2D_4X4_SRGB : PixelFormat::ASTC_2D_4X4;
+ case Tegra::Texture::TextureFormat::ASTC_2D_5X4:
+ return is_srgb ? PixelFormat::ASTC_2D_5X4_SRGB : PixelFormat::ASTC_2D_5X4;
+ case Tegra::Texture::TextureFormat::ASTC_2D_5X5:
+ return is_srgb ? PixelFormat::ASTC_2D_5X5_SRGB : PixelFormat::ASTC_2D_5X5;
+ case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
+ return is_srgb ? PixelFormat::ASTC_2D_8X8_SRGB : PixelFormat::ASTC_2D_8X8;
+ case Tegra::Texture::TextureFormat::ASTC_2D_8X5:
+ return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5;
+ case Tegra::Texture::TextureFormat::ASTC_2D_10X8:
+ return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8;
+ case Tegra::Texture::TextureFormat::R16_G16:
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::FLOAT:
+ return PixelFormat::RG16F;
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::RG16;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::RG16S;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::RG16UI;
+ case Tegra::Texture::ComponentType::SINT:
+ return PixelFormat::RG16I;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type));
+ UNREACHABLE();
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
+ static_cast<u32>(component_type));
+ UNREACHABLE();
+ }
+}
+
+ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
+ // TODO(Subv): Implement more component types
+ switch (type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return ComponentType::UNorm;
+ case Tegra::Texture::ComponentType::FLOAT:
+ return ComponentType::Float;
+ case Tegra::Texture::ComponentType::SNORM:
+ return ComponentType::SNorm;
+ case Tegra::Texture::ComponentType::UINT:
+ return ComponentType::UInt;
+ case Tegra::Texture::ComponentType::SINT:
+ return ComponentType::SInt;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
+ UNREACHABLE();
+ }
+}
+
+ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
+ // TODO(Subv): Implement more render targets
+ switch (format) {
+ case Tegra::RenderTargetFormat::RGBA8_UNORM:
+ case Tegra::RenderTargetFormat::RGBA8_SRGB:
+ case Tegra::RenderTargetFormat::BGRA8_UNORM:
+ case Tegra::RenderTargetFormat::BGRA8_SRGB:
+ case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
+ case Tegra::RenderTargetFormat::R8_UNORM:
+ case Tegra::RenderTargetFormat::RG16_UNORM:
+ case Tegra::RenderTargetFormat::R16_UNORM:
+ case Tegra::RenderTargetFormat::B5G6R5_UNORM:
+ case Tegra::RenderTargetFormat::BGR5A1_UNORM:
+ case Tegra::RenderTargetFormat::RG8_UNORM:
+ case Tegra::RenderTargetFormat::RGBA16_UNORM:
+ return ComponentType::UNorm;
+ case Tegra::RenderTargetFormat::RGBA8_SNORM:
+ case Tegra::RenderTargetFormat::RG16_SNORM:
+ case Tegra::RenderTargetFormat::R16_SNORM:
+ case Tegra::RenderTargetFormat::RG8_SNORM:
+ return ComponentType::SNorm;
+ case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+ case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
+ case Tegra::RenderTargetFormat::RGBA32_FLOAT:
+ case Tegra::RenderTargetFormat::RG32_FLOAT:
+ case Tegra::RenderTargetFormat::RG16_FLOAT:
+ case Tegra::RenderTargetFormat::R16_FLOAT:
+ case Tegra::RenderTargetFormat::R32_FLOAT:
+ return ComponentType::Float;
+ case Tegra::RenderTargetFormat::RGBA32_UINT:
+ case Tegra::RenderTargetFormat::RGBA16_UINT:
+ case Tegra::RenderTargetFormat::RG16_UINT:
+ case Tegra::RenderTargetFormat::R8_UINT:
+ case Tegra::RenderTargetFormat::R16_UINT:
+ case Tegra::RenderTargetFormat::RG32_UINT:
+ case Tegra::RenderTargetFormat::R32_UINT:
+ case Tegra::RenderTargetFormat::RGBA8_UINT:
+ return ComponentType::UInt;
+ case Tegra::RenderTargetFormat::RG16_SINT:
+ case Tegra::RenderTargetFormat::R16_SINT:
+ return ComponentType::SInt;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+}
+
+PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
+ switch (format) {
+ case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+ return PixelFormat::ABGR8U;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+}
+
+ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
+ switch (format) {
+ case Tegra::DepthFormat::Z16_UNORM:
+ case Tegra::DepthFormat::S8_Z24_UNORM:
+ case Tegra::DepthFormat::Z24_S8_UNORM:
+ return ComponentType::UNorm;
+ case Tegra::DepthFormat::Z32_FLOAT:
+ case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
+ return ComponentType::Float;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+}
+
+SurfaceType GetFormatType(PixelFormat pixel_format) {
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
+ return SurfaceType::ColorTexture;
+ }
+
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) {
+ return SurfaceType::Depth;
+ }
+
+ if (static_cast<std::size_t>(pixel_format) <
+ static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) {
+ return SurfaceType::DepthStencil;
+ }
+
+ // TODO(Subv): Implement the other formats
+ ASSERT(false);
+
+ return SurfaceType::Invalid;
+}
+
+bool IsPixelFormatASTC(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::ASTC_2D_4X4:
+ case PixelFormat::ASTC_2D_5X4:
+ case PixelFormat::ASTC_2D_5X5:
+ case PixelFormat::ASTC_2D_8X8:
+ case PixelFormat::ASTC_2D_8X5:
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8:
+ case PixelFormat::ASTC_2D_10X8_SRGB:
+ return true;
+ default:
+ return false;
+ }
+}
+
+std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
+ return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
+}
+
+bool IsFormatBCn(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::DXT1:
+ case PixelFormat::DXT23:
+ case PixelFormat::DXT45:
+ case PixelFormat::DXN1:
+ case PixelFormat::DXN2SNORM:
+ case PixelFormat::DXN2UNORM:
+ case PixelFormat::BC7U:
+ case PixelFormat::BC6H_UF16:
+ case PixelFormat::BC6H_SF16:
+ case PixelFormat::DXT1_SRGB:
+ case PixelFormat::DXT23_SRGB:
+ case PixelFormat::DXT45_SRGB:
+ case PixelFormat::BC7U_SRGB:
+ return true;
+ }
+ return false;
+}
+
+} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
new file mode 100644
index 000000000..0dd3eb2e4
--- /dev/null
+++ b/src/video_core/surface.h
@@ -0,0 +1,477 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <climits>
+#include <utility>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/gpu.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCore::Surface {
+
+enum class PixelFormat {
+ ABGR8U = 0,
+ ABGR8S = 1,
+ ABGR8UI = 2,
+ B5G6R5U = 3,
+ A2B10G10R10U = 4,
+ A1B5G5R5U = 5,
+ R8U = 6,
+ R8UI = 7,
+ RGBA16F = 8,
+ RGBA16U = 9,
+ RGBA16UI = 10,
+ R11FG11FB10F = 11,
+ RGBA32UI = 12,
+ DXT1 = 13,
+ DXT23 = 14,
+ DXT45 = 15,
+ DXN1 = 16, // This is also known as BC4
+ DXN2UNORM = 17,
+ DXN2SNORM = 18,
+ BC7U = 19,
+ BC6H_UF16 = 20,
+ BC6H_SF16 = 21,
+ ASTC_2D_4X4 = 22,
+ G8R8U = 23,
+ G8R8S = 24,
+ BGRA8 = 25,
+ RGBA32F = 26,
+ RG32F = 27,
+ R32F = 28,
+ R16F = 29,
+ R16U = 30,
+ R16S = 31,
+ R16UI = 32,
+ R16I = 33,
+ RG16 = 34,
+ RG16F = 35,
+ RG16UI = 36,
+ RG16I = 37,
+ RG16S = 38,
+ RGB32F = 39,
+ RGBA8_SRGB = 40,
+ RG8U = 41,
+ RG8S = 42,
+ RG32UI = 43,
+ R32UI = 44,
+ ASTC_2D_8X8 = 45,
+ ASTC_2D_8X5 = 46,
+ ASTC_2D_5X4 = 47,
+ BGRA8_SRGB = 48,
+ DXT1_SRGB = 49,
+ DXT23_SRGB = 50,
+ DXT45_SRGB = 51,
+ BC7U_SRGB = 52,
+ ASTC_2D_4X4_SRGB = 53,
+ ASTC_2D_8X8_SRGB = 54,
+ ASTC_2D_8X5_SRGB = 55,
+ ASTC_2D_5X4_SRGB = 56,
+ ASTC_2D_5X5 = 57,
+ ASTC_2D_5X5_SRGB = 58,
+ ASTC_2D_10X8 = 59,
+ ASTC_2D_10X8_SRGB = 60,
+
+ MaxColorFormat,
+
+ // Depth formats
+ Z32F = 61,
+ Z16 = 62,
+
+ MaxDepthFormat,
+
+ // DepthStencil formats
+ Z24S8 = 63,
+ S8Z24 = 64,
+ Z32FS8 = 65,
+
+ MaxDepthStencilFormat,
+
+ Max = MaxDepthStencilFormat,
+ Invalid = 255,
+};
+
+static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
+
+enum class ComponentType {
+ Invalid = 0,
+ SNorm = 1,
+ UNorm = 2,
+ SInt = 3,
+ UInt = 4,
+ Float = 5,
+};
+
+enum class SurfaceType {
+ ColorTexture = 0,
+ Depth = 1,
+ DepthStencil = 2,
+ Fill = 3,
+ Invalid = 4,
+};
+
+enum class SurfaceTarget {
+ Texture1D,
+ Texture2D,
+ Texture3D,
+ Texture1DArray,
+ Texture2DArray,
+ TextureCubemap,
+ TextureCubeArray,
+};
+
+/**
+ * Gets the compression factor for the specified PixelFormat. This applies to just the
+ * "compressed width" and "compressed height", not the overall compression factor of a
+ * compressed image. This is used for maintaining proper surface sizes for compressed
+ * texture formats.
+ */
+static constexpr u32 GetCompressionFactor(PixelFormat format) {
+ if (format == PixelFormat::Invalid)
+ return 0;
+
+ constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // G8R8U
+ 1, // G8R8S
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
+ 4, // ASTC_2D_8X8
+ 4, // ASTC_2D_8X5
+ 4, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 4, // ASTC_2D_4X4_SRGB
+ 4, // ASTC_2D_8X8_SRGB
+ 4, // ASTC_2D_8X5_SRGB
+ 4, // ASTC_2D_5X4_SRGB
+ 4, // ASTC_2D_5X5
+ 4, // ASTC_2D_5X5_SRGB
+ 4, // ASTC_2D_10X8
+ 4, // ASTC_2D_10X8_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
+ }};
+
+ ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size());
+ return compression_factor_table[static_cast<std::size_t>(format)];
+}
+
+static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
+ if (format == PixelFormat::Invalid)
+ return 0;
+ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // G8R8U
+ 1, // G8R8S
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
+ 8, // ASTC_2D_8X8
+ 8, // ASTC_2D_8X5
+ 5, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 4, // ASTC_2D_4X4_SRGB
+ 8, // ASTC_2D_8X8_SRGB
+ 8, // ASTC_2D_8X5_SRGB
+ 5, // ASTC_2D_5X4_SRGB
+ 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_SRGB
+ 10, // ASTC_2D_10X8
+ 10, // ASTC_2D_10X8_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
+ }};
+ ASSERT(static_cast<std::size_t>(format) < block_width_table.size());
+ return block_width_table[static_cast<std::size_t>(format)];
+}
+
+static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
+ if (format == PixelFormat::Invalid)
+ return 0;
+
+ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
+ 1, // ABGR8U
+ 1, // ABGR8S
+ 1, // ABGR8UI
+ 1, // B5G6R5U
+ 1, // A2B10G10R10U
+ 1, // A1B5G5R5U
+ 1, // R8U
+ 1, // R8UI
+ 1, // RGBA16F
+ 1, // RGBA16U
+ 1, // RGBA16UI
+ 1, // R11FG11FB10F
+ 1, // RGBA32UI
+ 4, // DXT1
+ 4, // DXT23
+ 4, // DXT45
+ 4, // DXN1
+ 4, // DXN2UNORM
+ 4, // DXN2SNORM
+ 4, // BC7U
+ 4, // BC6H_UF16
+ 4, // BC6H_SF16
+ 4, // ASTC_2D_4X4
+ 1, // G8R8U
+ 1, // G8R8S
+ 1, // BGRA8
+ 1, // RGBA32F
+ 1, // RG32F
+ 1, // R32F
+ 1, // R16F
+ 1, // R16U
+ 1, // R16S
+ 1, // R16UI
+ 1, // R16I
+ 1, // RG16
+ 1, // RG16F
+ 1, // RG16UI
+ 1, // RG16I
+ 1, // RG16S
+ 1, // RGB32F
+ 1, // RGBA8_SRGB
+ 1, // RG8U
+ 1, // RG8S
+ 1, // RG32UI
+ 1, // R32UI
+ 8, // ASTC_2D_8X8
+ 5, // ASTC_2D_8X5
+ 4, // ASTC_2D_5X4
+ 1, // BGRA8_SRGB
+ 4, // DXT1_SRGB
+ 4, // DXT23_SRGB
+ 4, // DXT45_SRGB
+ 4, // BC7U_SRGB
+ 4, // ASTC_2D_4X4_SRGB
+ 8, // ASTC_2D_8X8_SRGB
+ 5, // ASTC_2D_8X5_SRGB
+ 4, // ASTC_2D_5X4_SRGB
+ 5, // ASTC_2D_5X5
+ 5, // ASTC_2D_5X5_SRGB
+ 8, // ASTC_2D_10X8
+ 8, // ASTC_2D_10X8_SRGB
+ 1, // Z32F
+ 1, // Z16
+ 1, // Z24S8
+ 1, // S8Z24
+ 1, // Z32FS8
+ }};
+
+ ASSERT(static_cast<std::size_t>(format) < block_height_table.size());
+ return block_height_table[static_cast<std::size_t>(format)];
+}
+
+static constexpr u32 GetFormatBpp(PixelFormat format) {
+ if (format == PixelFormat::Invalid)
+ return 0;
+
+ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
+ 32, // ABGR8U
+ 32, // ABGR8S
+ 32, // ABGR8UI
+ 16, // B5G6R5U
+ 32, // A2B10G10R10U
+ 16, // A1B5G5R5U
+ 8, // R8U
+ 8, // R8UI
+ 64, // RGBA16F
+ 64, // RGBA16U
+ 64, // RGBA16UI
+ 32, // R11FG11FB10F
+ 128, // RGBA32UI
+ 64, // DXT1
+ 128, // DXT23
+ 128, // DXT45
+ 64, // DXN1
+ 128, // DXN2UNORM
+ 128, // DXN2SNORM
+ 128, // BC7U
+ 128, // BC6H_UF16
+ 128, // BC6H_SF16
+ 128, // ASTC_2D_4X4
+ 16, // G8R8U
+ 16, // G8R8S
+ 32, // BGRA8
+ 128, // RGBA32F
+ 64, // RG32F
+ 32, // R32F
+ 16, // R16F
+ 16, // R16U
+ 16, // R16S
+ 16, // R16UI
+ 16, // R16I
+ 32, // RG16
+ 32, // RG16F
+ 32, // RG16UI
+ 32, // RG16I
+ 32, // RG16S
+ 96, // RGB32F
+ 32, // RGBA8_SRGB
+ 16, // RG8U
+ 16, // RG8S
+ 64, // RG32UI
+ 32, // R32UI
+ 128, // ASTC_2D_8X8
+ 128, // ASTC_2D_8X5
+ 128, // ASTC_2D_5X4
+ 32, // BGRA8_SRGB
+ 64, // DXT1_SRGB
+ 128, // DXT23_SRGB
+ 128, // DXT45_SRGB
+ 128, // BC7U
+ 128, // ASTC_2D_4X4_SRGB
+ 128, // ASTC_2D_8X8_SRGB
+ 128, // ASTC_2D_8X5_SRGB
+ 128, // ASTC_2D_5X4_SRGB
+ 128, // ASTC_2D_5X5
+ 128, // ASTC_2D_5X5_SRGB
+ 128, // ASTC_2D_10X8
+ 128, // ASTC_2D_10X8_SRGB
+ 32, // Z32F
+ 16, // Z16
+ 32, // Z24S8
+ 32, // S8Z24
+ 64, // Z32FS8
+ }};
+
+ ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
+ return bpp_table[static_cast<std::size_t>(format)];
+}
+
+/// Returns the sizer in bytes of the specified pixel format
+static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
+ if (pixel_format == PixelFormat::Invalid) {
+ return 0;
+ }
+ return GetFormatBpp(pixel_format) / CHAR_BIT;
+}
+
+SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
+
+bool SurfaceTargetIsLayered(SurfaceTarget target);
+
+PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
+
+PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);
+
+PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
+ Tegra::Texture::ComponentType component_type,
+ bool is_srgb);
+
+ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type);
+
+ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format);
+
+PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format);
+
+ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format);
+
+SurfaceType GetFormatType(PixelFormat pixel_format);
+
+bool IsPixelFormatASTC(PixelFormat format);
+
+std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
+
+/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
+bool IsFormatBCn(PixelFormat format);
+
+} // namespace VideoCore::Surface
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index b1feacae9..bc50a4876 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1598,27 +1598,29 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
namespace Tegra::Texture::ASTC {
std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
- uint32_t block_width, uint32_t block_height) {
+ uint32_t depth, uint32_t block_width, uint32_t block_height) {
uint32_t blockIdx = 0;
- std::vector<uint8_t> outData(height * width * 4);
- for (uint32_t j = 0; j < height; j += block_height) {
- for (uint32_t i = 0; i < width; i += block_width) {
+ std::vector<uint8_t> outData(height * width * depth * 4);
+ for (uint32_t k = 0; k < depth; k++) {
+ for (uint32_t j = 0; j < height; j += block_height) {
+ for (uint32_t i = 0; i < width; i += block_width) {
- uint8_t* blockPtr = data.data() + blockIdx * 16;
+ uint8_t* blockPtr = data.data() + blockIdx * 16;
- // Blocks can be at most 12x12
- uint32_t uncompData[144];
- ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
+ // Blocks can be at most 12x12
+ uint32_t uncompData[144];
+ ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
- uint32_t decompWidth = std::min(block_width, width - i);
- uint32_t decompHeight = std::min(block_height, height - j);
+ uint32_t decompWidth = std::min(block_width, width - i);
+ uint32_t decompHeight = std::min(block_height, height - j);
- uint8_t* outRow = outData.data() + (j * width + i) * 4;
- for (uint32_t jj = 0; jj < decompHeight; jj++) {
- memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
- }
+ uint8_t* outRow = outData.data() + (j * width + i) * 4;
+ for (uint32_t jj = 0; jj < decompHeight; jj++) {
+ memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
+ }
- blockIdx++;
+ blockIdx++;
+ }
}
}
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index f0d7c0e56..d419dd025 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -10,6 +10,6 @@
namespace Tegra::Texture::ASTC {
std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
- uint32_t block_width, uint32_t block_height);
+ uint32_t depth, uint32_t block_width, uint32_t block_height);
} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index f1b40e7f5..c9160b467 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -45,7 +45,7 @@ constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
* Instead of going gob by gob, we map the coordinates inside a block and manage from
* those. Block_Width is assumed to be 1.
*/
-void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
+void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
const u32 y_end, const u32 z_end, const u32 tile_offset,
const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
@@ -81,7 +81,7 @@ void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unsw
* Instead of going gob by gob, we map the coordinates inside a block and manage from
* those. Block_Width is assumed to be 1.
*/
-void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
+void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
const u32 y_end, const u32 z_end, const u32 tile_offset,
const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
@@ -90,10 +90,10 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz
u32 z_address = tile_offset;
const u32 x_startb = x_start * bytes_per_pixel;
const u32 x_endb = x_end * bytes_per_pixel;
- const u32 copy_size = 16;
- const u32 gob_size_x = 64;
- const u32 gob_size_y = 8;
- const u32 gob_size_z = 1;
+ constexpr u32 copy_size = 16;
+ constexpr u32 gob_size_x = 64;
+ constexpr u32 gob_size_y = 8;
+ constexpr u32 gob_size_z = 1;
const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
for (u32 z = z_start; z < z_end; z++) {
u32 y_address = z_address;
@@ -126,24 +126,23 @@ void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizz
* https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
*/
template <bool fast>
-void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, const u32 width,
- const u32 height, const u32 depth, const u32 bytes_per_pixel,
+void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
+ const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel,
const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) {
auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
const u32 stride_x = width * out_bytes_per_pixel;
const u32 layer_z = height * stride_x;
- const u32 gob_x_bytes = 64;
+ constexpr u32 gob_x_bytes = 64;
const u32 gob_elements_x = gob_x_bytes / bytes_per_pixel;
- const u32 gob_elements_y = 8;
- const u32 gob_elements_z = 1;
+ constexpr u32 gob_elements_y = 8;
+ constexpr u32 gob_elements_z = 1;
const u32 block_x_elements = gob_elements_x;
const u32 block_y_elements = gob_elements_y * block_height;
const u32 block_z_elements = gob_elements_z * block_depth;
const u32 blocks_on_x = div_ceil(width, block_x_elements);
const u32 blocks_on_y = div_ceil(height, block_y_elements);
const u32 blocks_on_z = div_ceil(depth, block_z_elements);
- const u32 blocks = blocks_on_x * blocks_on_y * blocks_on_z;
- const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z;
+ constexpr u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z;
const u32 xy_block_size = gob_size * block_height;
const u32 block_size = xy_block_size * block_depth;
u32 tile_offset = 0;
@@ -172,7 +171,7 @@ void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
}
void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
- u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
+ u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data,
bool unswizzle, u32 block_height, u32 block_depth) {
if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
@@ -203,6 +202,8 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::ASTC_2D_5X4:
case TextureFormat::ASTC_2D_8X8:
case TextureFormat::ASTC_2D_8X5:
+ case TextureFormat::ASTC_2D_10X8:
+ case TextureFormat::ASTC_2D_5X5:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::BF10GF11RF11:
@@ -228,12 +229,21 @@ u32 BytesPerPixel(TextureFormat format) {
}
}
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
- u32 height, u32 depth, u32 block_height, u32 block_depth) {
+void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+ u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
+ u32 block_depth) {
+ CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
+ (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
+ bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true,
+ block_height, block_depth);
+}
+
+std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
+ u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
+ u32 block_height, u32 block_depth) {
std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
- CopySwizzledData(width / tile_size, height / tile_size, depth, bytes_per_pixel, bytes_per_pixel,
- Memory::GetPointer(address), unswizzled_data.data(), true, block_height,
- block_depth);
+ UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
+ width, height, depth, block_height, block_depth);
return unswizzled_data;
}
@@ -293,6 +303,8 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::BC6H_SF16:
case TextureFormat::ASTC_2D_4X4:
case TextureFormat::ASTC_2D_8X8:
+ case TextureFormat::ASTC_2D_5X5:
+ case TextureFormat::ASTC_2D_10X8:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A1B5G5R5:
@@ -320,13 +332,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth) {
if (tiled) {
- const u32 gobs_in_x = 64 / bytes_per_pixel;
- const u32 gobs_in_y = 8;
- const u32 gobs_in_z = 1;
- const u32 aligned_width = Common::AlignUp(width, gobs_in_x);
+ constexpr u32 gobs_in_x = 64;
+ constexpr u32 gobs_in_y = 8;
+ constexpr u32 gobs_in_z = 1;
+ const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gobs_in_x);
const u32 aligned_height = Common::AlignUp(height, gobs_in_y * block_height);
const u32 aligned_depth = Common::AlignUp(depth, gobs_in_z * block_depth);
- return aligned_width * aligned_height * aligned_depth * bytes_per_pixel;
+ return aligned_width * aligned_height * aligned_depth;
} else {
return width * height * depth * bytes_per_pixel;
}
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 4726f54a5..f4ef7c73e 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -10,11 +10,24 @@
namespace Tegra::Texture {
+// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
+// an small rect of (64/bytes_per_pixel)X8.
+inline std::size_t GetGOBSize() {
+ return 512;
+}
+
+/**
+ * Unswizzles a swizzled texture without changing its format.
+ */
+void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
+ u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
+ u32 block_height = TICEntry::DefaultBlockHeight,
+ u32 block_depth = TICEntry::DefaultBlockHeight);
/**
* Unswizzles a swizzled texture without changing its format.
*/
-std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
- u32 height, u32 depth,
+std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
+ u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
u32 block_height = TICEntry::DefaultBlockHeight,
u32 block_depth = TICEntry::DefaultBlockHeight);
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 5947bd2b9..e199d019a 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -168,19 +168,29 @@ struct TICEntry {
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
-
+ BitField<26, 1, u32> use_header_opt_control;
+ BitField<27, 1, u32> depth_texture;
BitField<28, 4, u32> max_mip_level;
};
union {
BitField<0, 16, u32> width_minus_1;
+ BitField<22, 1, u32> srgb_conversion;
BitField<23, 4, TextureType> texture_type;
+ BitField<29, 3, u32> border_size;
};
union {
BitField<0, 16, u32> height_minus_1;
BitField<16, 15, u32> depth_minus_1;
};
+ union {
+ BitField<6, 13, u32> mip_lod_bias;
+ BitField<27, 3, u32> max_anisotropy;
+ };
- INSERT_PADDING_BYTES(8);
+ union {
+ BitField<0, 4, u32> res_min_mip_level;
+ BitField<4, 4, u32> res_max_mip_level;
+ };
GPUVAddr Address() const {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
@@ -227,6 +237,10 @@ struct TICEntry {
return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey;
}
+
+ bool IsSrgbConversionEnabled() const {
+ return srgb_conversion != 0;
+ }
};
static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 237cc1307..e0a14d48f 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -161,30 +161,4 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe
}
}
-static void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr,
- std::string extra_info = "") {
- if (!GLAD_GL_KHR_debug) {
- return; // We don't need to throw an error as this is just for debugging
- }
- const std::string nice_addr = fmt::format("0x{:016x}", addr);
- std::string object_label;
-
- if (extra_info.empty()) {
- switch (identifier) {
- case GL_TEXTURE:
- object_label = "Texture@" + nice_addr;
- break;
- case GL_PROGRAM:
- object_label = "Shader@" + nice_addr;
- break;
- default:
- object_label = fmt::format("Object(0x{:x})@{}", identifier, nice_addr);
- break;
- }
- } else {
- object_label = extra_info + '@' + nice_addr;
- }
- glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
-}
-
} // namespace VideoCore