summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp4
-rw-r--r--src/video_core/engines/shader_bytecode.h5
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/memory_manager.cpp87
-rw-r--r--src/video_core/memory_manager.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp133
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp610
-rw-r--r--src/video_core/renderer_opengl/gl_state.h52
-rw-r--r--src/video_core/shader/decode/texture.cpp114
-rw-r--r--src/video_core/shader/decode/xmad.cpp39
-rw-r--r--src/video_core/shader/shader_ir.h12
-rw-r--r--src/video_core/shader/track.cpp17
17 files changed, 671 insertions, 444 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8194a4b4a..74403eed4 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -334,8 +334,8 @@ void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();
const u32 cache_flush = regs.sync_info.unknown.Value();
- UNIMPLEMENTED_MSG("Syncpoint Set {}, increment: {}, unk: {}", sync_point, increment,
- cache_flush);
+ LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
+ cache_flush);
}
void Maxwell3D::DrawArrays() {
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7f613370b..2e1e96c81 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1238,13 +1238,16 @@ union Instruction {
union {
BitField<20, 16, u64> imm20_16;
+ BitField<35, 1, u64> high_b_rr; // used on RR
BitField<36, 1, u64> product_shift_left;
BitField<37, 1, u64> merge_37;
BitField<48, 1, u64> sign_a;
BitField<49, 1, u64> sign_b;
+ BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
BitField<50, 3, XmadMode> mode;
BitField<52, 1, u64> high_b;
BitField<53, 1, u64> high_a;
+ BitField<55, 1, u64> product_shift_left_second; // used on CR
BitField<56, 1, u64> merge_56;
} xmad;
@@ -1662,7 +1665,7 @@ private:
INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
- INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+ INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 30b29e14d..4461083ff 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
auto& rasterizer{renderer.Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>();
+ memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 8417324ff..0f4e820aa 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,16 +5,13 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
-#include "core/core.h"
#include "core/memory.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
-#include "video_core/renderer_base.h"
namespace Tegra {
-MemoryManager::MemoryManager() {
+MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
Common::PageType::Unmapped);
@@ -70,8 +67,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
const u64 aligned_size{Common::AlignUp(size, page_size)};
const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
- Core::System::GetInstance().Renderer().Rasterizer().FlushAndInvalidateRegion(cache_addr,
- aligned_size);
+ rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
UnmapRange(gpu_addr, aligned_size);
return gpu_addr;
@@ -204,14 +200,85 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
}
void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const {
- std::memcpy(dest_buffer, GetPointer(src_addr), size);
+ std::size_t remaining_size{size};
+ std::size_t page_index{src_addr >> page_bits};
+ std::size_t page_offset{src_addr & page_mask};
+
+ while (remaining_size > 0) {
+ const std::size_t copy_amount{
+ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+ switch (page_table.attributes[page_index]) {
+ case Common::PageType::Memory: {
+ const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ std::memcpy(dest_buffer, src_ptr, copy_amount);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+
+ page_index++;
+ page_offset = 0;
+ dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+ remaining_size -= copy_amount;
+ }
}
+
void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
- std::memcpy(GetPointer(dest_addr), src_buffer, size);
+ std::size_t remaining_size{size};
+ std::size_t page_index{dest_addr >> page_bits};
+ std::size_t page_offset{dest_addr & page_mask};
+
+ while (remaining_size > 0) {
+ const std::size_t copy_amount{
+ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+ switch (page_table.attributes[page_index]) {
+ case Common::PageType::Memory: {
+ u8* dest_ptr{page_table.pointers[page_index] + page_offset};
+ rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+ std::memcpy(dest_ptr, src_buffer, copy_amount);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+
+ page_index++;
+ page_offset = 0;
+ src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+ remaining_size -= copy_amount;
+ }
}
void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
- std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
+ std::size_t remaining_size{size};
+ std::size_t page_index{src_addr >> page_bits};
+ std::size_t page_offset{src_addr & page_mask};
+
+ while (remaining_size > 0) {
+ const std::size_t copy_amount{
+ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
+
+ switch (page_table.attributes[page_index]) {
+ case Common::PageType::Memory: {
+ const u8* src_ptr{page_table.pointers[page_index] + page_offset};
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ WriteBlock(dest_addr, src_ptr, copy_amount);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+
+ page_index++;
+ page_offset = 0;
+ dest_addr += static_cast<VAddr>(copy_amount);
+ src_addr += static_cast<VAddr>(copy_amount);
+ remaining_size -= copy_amount;
+ }
}
void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
@@ -351,7 +418,7 @@ MemoryManager::VMAIter MemoryManager::CarveVMA(GPUVAddr base, u64 size) {
const VirtualMemoryArea& vma{vma_handle->second};
if (vma.type == VirtualMemoryArea::Type::Mapped) {
// Region is already allocated
- return {};
+ return vma_handle;
}
const VAddr start_in_vma{base - vma.base};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 178e2f655..647cbf93a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,6 +10,10 @@
#include "common/common_types.h"
#include "common/page_table.h"
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Tegra {
/**
@@ -43,7 +47,7 @@ struct VirtualMemoryArea {
class MemoryManager final {
public:
- MemoryManager();
+ MemoryManager(VideoCore::RasterizerInterface& rasterizer);
GPUVAddr AllocateSpace(u64 size, u64 align);
GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align);
@@ -144,6 +148,7 @@ private:
Common::PageTable page_table{page_bits};
VMAMap vma_map;
+ VideoCore::RasterizerInterface& rasterizer;
};
} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 0aa66fa5b..aa6da1944 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -662,8 +662,8 @@ void CachedSurface::FlushGLBuffer() {
gl_buffer[0].resize(GetSizeInBytes());
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.width * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
+ glPixelStorei(GL_PACK_ALIGNMENT, align);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
ASSERT(!tuple.compressed);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -708,8 +708,8 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
- ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
+ const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, align);
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index ad4fd3ad2..db280dbb3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -11,6 +11,7 @@
#include <vector>
#include "common/alignment.h"
+#include "common/bit_util.h"
#include "common/common_types.h"
#include "common/hash.h"
#include "common/math_util.h"
@@ -205,6 +206,13 @@ struct SurfaceParams {
return bd;
}
+ u32 RowAlign(u32 mip_level) const {
+ const u32 m_width = MipWidth(mip_level);
+ const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
+ const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
+ return (1U << l2);
+ }
+
/// Creates SurfaceParams from a texture configuration
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
const GLShader::SamplerEntry& entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index ffaff82e5..99f67494c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -40,6 +40,10 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
/// Gets the shader program code from memory for the specified address
ProgramCode GetShaderCode(const u8* host_ptr) {
ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+ ASSERT_OR_EXECUTE(host_ptr != nullptr, {
+ std::fill(program_code.begin(), program_code.end(), 0);
+ return program_code;
+ });
std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
return program_code;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a1a51f226..3ea08ef7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -21,6 +21,8 @@
namespace OpenGL::GLShader {
+namespace {
+
using Tegra::Shader::Attribute;
using Tegra::Shader::AttributeUse;
using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
using Operation = const OperationNode&;
+enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+
+struct TextureAoffi {};
+using TextureArgument = std::pair<Type, Node>;
+using TextureIR = std::variant<TextureAoffi, TextureArgument>;
+
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
-
class ShaderWriter {
public:
void AddExpression(std::string_view text) {
@@ -91,7 +97,7 @@ private:
};
/// Generates code to use for a swizzle operation.
-static std::string GetSwizzle(u32 elem) {
+std::string GetSwizzle(u32 elem) {
ASSERT(elem <= 3);
std::string swizzle = ".";
swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
}
/// Translate topology
-static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
+std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
case Tegra::Shader::OutputTopology::PointList:
return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
}
/// Returns true if an object has to be treated as precise
-static bool IsPrecise(Operation operand) {
+bool IsPrecise(Operation operand) {
const auto& meta = operand.GetMeta();
if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
return false;
}
-static bool IsPrecise(Node node) {
+bool IsPrecise(Node node) {
if (const auto operation = std::get_if<OperationNode>(node)) {
return IsPrecise(*operation);
}
@@ -723,8 +729,8 @@ private:
result_type));
}
- std::string GenerateTexture(Operation operation, const std::string& func,
- const std::vector<std::pair<Type, Node>>& extras) {
+ std::string GenerateTexture(Operation operation, const std::string& function_suffix,
+ const std::vector<TextureIR>& extras) {
constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -734,11 +740,11 @@ private:
const bool has_array = meta->sampler.IsArray();
const bool has_shadow = meta->sampler.IsShadow();
- std::string expr = func;
- expr += '(';
- expr += GetSampler(meta->sampler);
- expr += ", ";
-
+ std::string expr = "texture" + function_suffix;
+ if (!meta->aoffi.empty()) {
+ expr += "Offset";
+ }
+ expr += '(' + GetSampler(meta->sampler) + ", ";
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
@@ -756,36 +762,74 @@ private:
}
expr += ')';
- for (const auto& extra_pair : extras) {
- const auto [type, operand] = extra_pair;
- if (operand == nullptr) {
- continue;
+ for (const auto& variant : extras) {
+ if (const auto argument = std::get_if<TextureArgument>(&variant)) {
+ expr += GenerateTextureArgument(*argument);
+ } else if (std::get_if<TextureAoffi>(&variant)) {
+ expr += GenerateTextureAoffi(meta->aoffi);
+ } else {
+ UNREACHABLE();
}
- expr += ", ";
+ }
- switch (type) {
- case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
- // Inline the string as an immediate integer in GLSL (some extra arguments are
- // required to be constant)
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- expr += "ftoi(" + Visit(operand) + ')';
- }
- break;
- case Type::Float:
- expr += Visit(operand);
- break;
- default: {
- const auto type_int = static_cast<u32>(type);
- UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
- expr += '0';
- break;
+ return expr + ')';
+ }
+
+ std::string GenerateTextureArgument(TextureArgument argument) {
+ const auto [type, operand] = argument;
+ if (operand == nullptr) {
+ return {};
+ }
+
+ std::string expr = ", ";
+ switch (type) {
+ case Type::Int:
+ if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ // Inline the string as an immediate integer in GLSL (some extra arguments are
+ // required to be constant)
+ expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else {
+ expr += "ftoi(" + Visit(operand) + ')';
+ }
+ break;
+ case Type::Float:
+ expr += Visit(operand);
+ break;
+ default: {
+ const auto type_int = static_cast<u32>(type);
+ UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
+ expr += '0';
+ break;
+ }
+ }
+ return expr;
+ }
+
+ std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
+ if (aoffi.empty()) {
+ return {};
+ }
+ constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+ std::string expr = ", ";
+ expr += coord_constructors.at(aoffi.size() - 1);
+ expr += '(';
+
+ for (std::size_t index = 0; index < aoffi.size(); ++index) {
+ const auto operand{aoffi.at(index)};
+ if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+ // to be constant by the standard).
+ expr += std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else {
+ expr += "ftoi(" + Visit(operand) + ')';
}
+ if (index + 1 < aoffi.size()) {
+ expr += ", ";
}
}
+ expr += ')';
- return expr + ')';
+ return expr;
}
std::string Assign(Operation operation) {
@@ -1164,7 +1208,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}});
+ std::string expr = GenerateTexture(
+ operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1175,7 +1220,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}});
+ std::string expr = GenerateTexture(
+ operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1187,7 +1233,8 @@ private:
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return GenerateTexture(operation, "textureGather", {{type, meta->component}}) +
+ return GenerateTexture(operation, "Gather",
+ {TextureArgument{type, meta->component}, TextureAoffi{}}) +
GetSwizzle(meta->element);
}
@@ -1217,8 +1264,8 @@ private:
ASSERT(meta);
if (meta->element < 2) {
- return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) +
- " * vec2(256))" + GetSwizzle(meta->element) + "))";
+ return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
+ GetSwizzle(meta->element) + "))";
}
return "0";
}
@@ -1571,6 +1618,8 @@ private:
ShaderWriter code;
};
+} // Anonymous namespace
+
std::string GetCommonDeclarations() {
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index d2d979997..8a43eb157 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -10,8 +10,8 @@
#include "common/common_types.h"
#include "common/file_util.h"
#include "common/logging/log.h"
-#include "common/lz4_compression.h"
#include "common/scm_rev.h"
+#include "common/zstd_compression.h"
#include "core/core.h"
#include "core/hle/kernel/process.h"
@@ -259,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length);
+ dump.binary = Common::Compression::DecompressDataZSTD(compressed_binary);
if (dump.binary.empty()) {
return {};
}
@@ -288,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
return {};
}
- const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size);
+ const std::vector<u8> code = Common::Compression::DecompressDataZSTD(compressed_code);
if (code.empty()) {
return {};
}
@@ -474,8 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str
if (!IsUsable())
return;
- const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC(
- reinterpret_cast<const u8*>(code.data()), code.size(), 9)};
+ const std::vector<u8> compressed_code{Common::Compression::CompressDataZSTDDefault(
+ reinterpret_cast<const u8*>(code.data()), code.size())};
if (compressed_code.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
unique_identifier);
@@ -506,7 +506,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
const std::vector<u8> compressed_binary =
- Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9);
+ Common::Compression::CompressDataZSTDDefault(binary.data(), binary.size());
if (compressed_binary.empty()) {
LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 8eef2a920..37dcfefdb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -62,7 +62,6 @@ public:
UpdatePipeline();
state.draw.shader_program = 0;
state.draw.program_pipeline = pipeline.handle;
- state.geometry_shaders.enabled = (gs != 0);
}
private:
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 9419326a3..52d569a1b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -10,16 +10,62 @@
namespace OpenGL {
-OpenGLState OpenGLState::cur_state;
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+OpenGLState OpenGLState::cur_state;
bool OpenGLState::s_rgb_used;
+namespace {
+
+template <typename T>
+bool UpdateValue(T& current_value, const T new_value) {
+ const bool changed = current_value != new_value;
+ current_value = new_value;
+ return changed;
+}
+
+template <typename T1, typename T2>
+bool UpdateTie(T1 current_value, const T2 new_value) {
+ const bool changed = current_value != new_value;
+ current_value = new_value;
+ return changed;
+}
+
+void Enable(GLenum cap, bool enable) {
+ if (enable) {
+ glEnable(cap);
+ } else {
+ glDisable(cap);
+ }
+}
+
+void Enable(GLenum cap, GLuint index, bool enable) {
+ if (enable) {
+ glEnablei(cap, index);
+ } else {
+ glDisablei(cap, index);
+ }
+}
+
+void Enable(GLenum cap, bool& current_value, bool new_value) {
+ if (UpdateValue(current_value, new_value))
+ Enable(cap, new_value);
+}
+
+void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
+ if (UpdateValue(current_value, new_value))
+ Enable(cap, index, new_value);
+}
+
+} // namespace
+
OpenGLState::OpenGLState() {
// These all match default OpenGL values
- geometry_shaders.enabled = false;
framebuffer_srgb.enabled = false;
+
multisample_control.alpha_to_coverage = false;
multisample_control.alpha_to_one = false;
+
cull.enabled = false;
cull.mode = GL_BACK;
cull.front_face = GL_CCW;
@@ -30,14 +76,15 @@ OpenGLState::OpenGLState() {
primitive_restart.enabled = false;
primitive_restart.index = 0;
+
for (auto& item : color_mask) {
item.red_enabled = GL_TRUE;
item.green_enabled = GL_TRUE;
item.blue_enabled = GL_TRUE;
item.alpha_enabled = GL_TRUE;
}
- stencil.test_enabled = false;
- auto reset_stencil = [](auto& config) {
+
+ const auto ResetStencil = [](auto& config) {
config.test_func = GL_ALWAYS;
config.test_ref = 0;
config.test_mask = 0xFFFFFFFF;
@@ -46,8 +93,10 @@ OpenGLState::OpenGLState() {
config.action_depth_pass = GL_KEEP;
config.action_stencil_fail = GL_KEEP;
};
- reset_stencil(stencil.front);
- reset_stencil(stencil.back);
+ stencil.test_enabled = false;
+ ResetStencil(stencil.front);
+ ResetStencil(stencil.back);
+
for (auto& item : viewports) {
item.x = 0;
item.y = 0;
@@ -61,6 +110,7 @@ OpenGLState::OpenGLState() {
item.scissor.width = 0;
item.scissor.height = 0;
}
+
for (auto& item : blend) {
item.enabled = true;
item.rgb_equation = GL_FUNC_ADD;
@@ -70,11 +120,14 @@ OpenGLState::OpenGLState() {
item.src_a_func = GL_ONE;
item.dst_a_func = GL_ZERO;
}
+
independant_blend.enabled = false;
+
blend_color.red = 0.0f;
blend_color.green = 0.0f;
blend_color.blue = 0.0f;
blend_color.alpha = 0.0f;
+
logic_op.enabled = false;
logic_op.operation = GL_COPY;
@@ -91,9 +144,12 @@ OpenGLState::OpenGLState() {
clip_distance = {};
point.size = 1;
+
fragment_color_clamp.enabled = false;
+
depth_clamp.far_plane = false;
depth_clamp.near_plane = false;
+
polygon_offset.fill_enable = false;
polygon_offset.line_enable = false;
polygon_offset.point_enable = false;
@@ -103,260 +159,255 @@ OpenGLState::OpenGLState() {
}
void OpenGLState::ApplyDefaultState() {
+ glEnable(GL_BLEND);
glDisable(GL_FRAMEBUFFER_SRGB);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glDisable(GL_PRIMITIVE_RESTART);
glDisable(GL_STENCIL_TEST);
- glEnable(GL_BLEND);
glDisable(GL_COLOR_LOGIC_OP);
glDisable(GL_SCISSOR_TEST);
}
+void OpenGLState::ApplyFramebufferState() const {
+ if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
+ }
+ if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
+ }
+}
+
+void OpenGLState::ApplyVertexArrayState() const {
+ if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
+ glBindVertexArray(draw.vertex_array);
+ }
+}
+
+void OpenGLState::ApplyShaderProgram() const {
+ if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
+ glUseProgram(draw.shader_program);
+ }
+}
+
+void OpenGLState::ApplyProgramPipeline() const {
+ if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
+ glBindProgramPipeline(draw.program_pipeline);
+ }
+}
+
+void OpenGLState::ApplyClipDistances() const {
+ for (std::size_t i = 0; i < clip_distance.size(); ++i) {
+ Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
+ clip_distance[i]);
+ }
+}
+
+void OpenGLState::ApplyPointSize() const {
+ if (UpdateValue(cur_state.point.size, point.size)) {
+ glPointSize(point.size);
+ }
+}
+
+void OpenGLState::ApplyFragmentColorClamp() const {
+ if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
+ fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
+ }
+}
+
+void OpenGLState::ApplyMultisample() const {
+ Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
+ multisample_control.alpha_to_coverage);
+ Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
+ multisample_control.alpha_to_one);
+}
+
+void OpenGLState::ApplyDepthClamp() const {
+ if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
+ depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
+ return;
+ }
+ cur_state.depth_clamp = depth_clamp;
+
+ UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
+ "Unimplemented Depth Clamp Separation!");
+
+ Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
+}
+
void OpenGLState::ApplySRgb() const {
- if (framebuffer_srgb.enabled != cur_state.framebuffer_srgb.enabled) {
- if (framebuffer_srgb.enabled) {
- // Track if sRGB is used
- s_rgb_used = true;
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
- }
+ if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
+ return;
+ cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
+ if (framebuffer_srgb.enabled) {
+ // Track if sRGB is used
+ s_rgb_used = true;
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
}
}
void OpenGLState::ApplyCulling() const {
- if (cull.enabled != cur_state.cull.enabled) {
- if (cull.enabled) {
- glEnable(GL_CULL_FACE);
- } else {
- glDisable(GL_CULL_FACE);
- }
- }
+ Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
- if (cull.mode != cur_state.cull.mode) {
+ if (UpdateValue(cur_state.cull.mode, cull.mode)) {
glCullFace(cull.mode);
}
- if (cull.front_face != cur_state.cull.front_face) {
+ if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
glFrontFace(cull.front_face);
}
}
void OpenGLState::ApplyColorMask() const {
- if (independant_blend.enabled) {
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- const auto& updated = color_mask[i];
- const auto& current = cur_state.color_mask[i];
- if (updated.red_enabled != current.red_enabled ||
- updated.green_enabled != current.green_enabled ||
- updated.blue_enabled != current.blue_enabled ||
- updated.alpha_enabled != current.alpha_enabled) {
- glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
- updated.blue_enabled, updated.alpha_enabled);
- }
- }
- } else {
- const auto& updated = color_mask[0];
- const auto& current = cur_state.color_mask[0];
+ for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+ const auto& updated = color_mask[i];
+ auto& current = cur_state.color_mask[i];
if (updated.red_enabled != current.red_enabled ||
updated.green_enabled != current.green_enabled ||
updated.blue_enabled != current.blue_enabled ||
updated.alpha_enabled != current.alpha_enabled) {
- glColorMask(updated.red_enabled, updated.green_enabled, updated.blue_enabled,
- updated.alpha_enabled);
+ current = updated;
+ glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
+ updated.blue_enabled, updated.alpha_enabled);
}
}
}
void OpenGLState::ApplyDepth() const {
- if (depth.test_enabled != cur_state.depth.test_enabled) {
- if (depth.test_enabled) {
- glEnable(GL_DEPTH_TEST);
- } else {
- glDisable(GL_DEPTH_TEST);
- }
- }
+ Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
- if (depth.test_func != cur_state.depth.test_func) {
+ if (cur_state.depth.test_func != depth.test_func) {
+ cur_state.depth.test_func = depth.test_func;
glDepthFunc(depth.test_func);
}
- if (depth.write_mask != cur_state.depth.write_mask) {
+ if (cur_state.depth.write_mask != depth.write_mask) {
+ cur_state.depth.write_mask = depth.write_mask;
glDepthMask(depth.write_mask);
}
}
void OpenGLState::ApplyPrimitiveRestart() const {
- if (primitive_restart.enabled != cur_state.primitive_restart.enabled) {
- if (primitive_restart.enabled) {
- glEnable(GL_PRIMITIVE_RESTART);
- } else {
- glDisable(GL_PRIMITIVE_RESTART);
- }
- }
+ Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
- if (primitive_restart.index != cur_state.primitive_restart.index) {
+ if (cur_state.primitive_restart.index != primitive_restart.index) {
+ cur_state.primitive_restart.index = primitive_restart.index;
glPrimitiveRestartIndex(primitive_restart.index);
}
}
void OpenGLState::ApplyStencilTest() const {
- if (stencil.test_enabled != cur_state.stencil.test_enabled) {
- if (stencil.test_enabled) {
- glEnable(GL_STENCIL_TEST);
- } else {
- glDisable(GL_STENCIL_TEST);
- }
- }
-
- const auto ConfigStencil = [](GLenum face, const auto& config, const auto& prev_config) {
- if (config.test_func != prev_config.test_func || config.test_ref != prev_config.test_ref ||
- config.test_mask != prev_config.test_mask) {
+ Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
+
+ const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
+ if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
+ current.test_mask != config.test_mask) {
+ current.test_func = config.test_func;
+ current.test_ref = config.test_ref;
+ current.test_mask = config.test_mask;
glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
}
- if (config.action_depth_fail != prev_config.action_depth_fail ||
- config.action_depth_pass != prev_config.action_depth_pass ||
- config.action_stencil_fail != prev_config.action_stencil_fail) {
+ if (current.action_depth_fail != config.action_depth_fail ||
+ current.action_depth_pass != config.action_depth_pass ||
+ current.action_stencil_fail != config.action_stencil_fail) {
+ current.action_depth_fail = config.action_depth_fail;
+ current.action_depth_pass = config.action_depth_pass;
+ current.action_stencil_fail = config.action_stencil_fail;
glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
config.action_depth_pass);
}
- if (config.write_mask != prev_config.write_mask) {
+ if (current.write_mask != config.write_mask) {
+ current.write_mask = config.write_mask;
glStencilMaskSeparate(face, config.write_mask);
}
};
ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
}
-// Viewport does not affects glClearBuffer so emulate viewport using scissor test
-void OpenGLState::EmulateViewportWithScissor() {
- auto& current = viewports[0];
- if (current.scissor.enabled) {
- const GLint left = std::max(current.x, current.scissor.x);
- const GLint right =
- std::max(current.x + current.width, current.scissor.x + current.scissor.width);
- const GLint bottom = std::max(current.y, current.scissor.y);
- const GLint top =
- std::max(current.y + current.height, current.scissor.y + current.scissor.height);
- current.scissor.x = std::max(left, 0);
- current.scissor.y = std::max(bottom, 0);
- current.scissor.width = std::max(right - left, 0);
- current.scissor.height = std::max(top - bottom, 0);
- } else {
- current.scissor.enabled = true;
- current.scissor.x = current.x;
- current.scissor.y = current.y;
- current.scissor.width = current.width;
- current.scissor.height = current.height;
- }
-}
void OpenGLState::ApplyViewport() const {
- if (geometry_shaders.enabled) {
- for (GLuint i = 0; i < static_cast<GLuint>(Tegra::Engines::Maxwell3D::Regs::NumViewports);
- i++) {
- const auto& current = cur_state.viewports[i];
- const auto& updated = viewports[i];
- if (updated.x != current.x || updated.y != current.y ||
- updated.width != current.width || updated.height != current.height) {
- glViewportIndexedf(
- i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
- static_cast<GLfloat>(updated.width), static_cast<GLfloat>(updated.height));
- }
- if (updated.depth_range_near != current.depth_range_near ||
- updated.depth_range_far != current.depth_range_far) {
- glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
- }
-
- if (updated.scissor.enabled != current.scissor.enabled) {
- if (updated.scissor.enabled) {
- glEnablei(GL_SCISSOR_TEST, i);
- } else {
- glDisablei(GL_SCISSOR_TEST, i);
- }
- }
-
- if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
- updated.scissor.width != current.scissor.width ||
- updated.scissor.height != current.scissor.height) {
- glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
- }
- }
- } else {
- const auto& current = cur_state.viewports[0];
- const auto& updated = viewports[0];
- if (updated.x != current.x || updated.y != current.y || updated.width != current.width ||
- updated.height != current.height) {
- glViewport(updated.x, updated.y, updated.width, updated.height);
- }
-
- if (updated.depth_range_near != current.depth_range_near ||
- updated.depth_range_far != current.depth_range_far) {
- glDepthRange(updated.depth_range_near, updated.depth_range_far);
+ for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
+ const auto& updated = viewports[i];
+ auto& current = cur_state.viewports[i];
+
+ if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
+ current.height != updated.height) {
+ current.x = updated.x;
+ current.y = updated.y;
+ current.width = updated.width;
+ current.height = updated.height;
+ glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
+ static_cast<GLfloat>(updated.width),
+ static_cast<GLfloat>(updated.height));
}
-
- if (updated.scissor.enabled != current.scissor.enabled) {
- if (updated.scissor.enabled) {
- glEnable(GL_SCISSOR_TEST);
- } else {
- glDisable(GL_SCISSOR_TEST);
- }
+ if (current.depth_range_near != updated.depth_range_near ||
+ current.depth_range_far != updated.depth_range_far) {
+ current.depth_range_near = updated.depth_range_near;
+ current.depth_range_far = updated.depth_range_far;
+ glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
}
- if (updated.scissor.x != current.scissor.x || updated.scissor.y != current.scissor.y ||
- updated.scissor.width != current.scissor.width ||
- updated.scissor.height != current.scissor.height) {
- glScissor(updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
+ Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
+
+ if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
+ current.scissor.width != updated.scissor.width ||
+ current.scissor.height != updated.scissor.height) {
+ current.scissor.x = updated.scissor.x;
+ current.scissor.y = updated.scissor.y;
+ current.scissor.width = updated.scissor.width;
+ current.scissor.height = updated.scissor.height;
+ glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
+ updated.scissor.height);
}
}
}
void OpenGLState::ApplyGlobalBlending() const {
- const Blend& current = cur_state.blend[0];
const Blend& updated = blend[0];
- if (updated.enabled != current.enabled) {
- if (updated.enabled) {
- glEnable(GL_BLEND);
- } else {
- glDisable(GL_BLEND);
- }
- }
- if (!updated.enabled) {
- return;
- }
- if (updated.src_rgb_func != current.src_rgb_func ||
- updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
- updated.dst_a_func != current.dst_a_func) {
+ Blend& current = cur_state.blend[0];
+
+ Enable(GL_BLEND, current.enabled, updated.enabled);
+
+ if (current.src_rgb_func != updated.src_rgb_func ||
+ current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
+ current.dst_a_func != updated.dst_a_func) {
+ current.src_rgb_func = updated.src_rgb_func;
+ current.dst_rgb_func = updated.dst_rgb_func;
+ current.src_a_func = updated.src_a_func;
+ current.dst_a_func = updated.dst_a_func;
glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
updated.dst_a_func);
}
- if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+ if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
+ current.rgb_equation = updated.rgb_equation;
+ current.a_equation = updated.a_equation;
glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
}
}
void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
const Blend& updated = blend[target];
- const Blend& current = cur_state.blend[target];
- if (updated.enabled != current.enabled || force) {
- if (updated.enabled) {
- glEnablei(GL_BLEND, static_cast<GLuint>(target));
- } else {
- glDisablei(GL_BLEND, static_cast<GLuint>(target));
- }
+ Blend& current = cur_state.blend[target];
+
+ if (current.enabled != updated.enabled || force) {
+ current.enabled = updated.enabled;
+ Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
}
- if (updated.src_rgb_func != current.src_rgb_func ||
- updated.dst_rgb_func != current.dst_rgb_func || updated.src_a_func != current.src_a_func ||
- updated.dst_a_func != current.dst_a_func) {
+ if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
+ current.dst_a_func),
+ std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
+ updated.dst_a_func))) {
glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
}
- if (updated.rgb_equation != current.rgb_equation || updated.a_equation != current.a_equation) {
+ if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
+ std::tie(updated.rgb_equation, updated.a_equation))) {
glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
updated.a_equation);
}
@@ -364,77 +415,48 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
void OpenGLState::ApplyBlending() const {
if (independant_blend.enabled) {
- for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- ApplyTargetBlending(i,
- independant_blend.enabled != cur_state.independant_blend.enabled);
+ const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
+ for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
+ ApplyTargetBlending(target, force);
}
} else {
ApplyGlobalBlending();
}
- if (blend_color.red != cur_state.blend_color.red ||
- blend_color.green != cur_state.blend_color.green ||
- blend_color.blue != cur_state.blend_color.blue ||
- blend_color.alpha != cur_state.blend_color.alpha) {
+ cur_state.independant_blend.enabled = independant_blend.enabled;
+
+ if (UpdateTie(
+ std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
+ cur_state.blend_color.blue, cur_state.blend_color.alpha),
+ std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
}
}
void OpenGLState::ApplyLogicOp() const {
- if (logic_op.enabled != cur_state.logic_op.enabled) {
- if (logic_op.enabled) {
- glEnable(GL_COLOR_LOGIC_OP);
- } else {
- glDisable(GL_COLOR_LOGIC_OP);
- }
- }
+ Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
- if (logic_op.operation != cur_state.logic_op.operation) {
+ if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
glLogicOp(logic_op.operation);
}
}
void OpenGLState::ApplyPolygonOffset() const {
- const bool fill_enable_changed =
- polygon_offset.fill_enable != cur_state.polygon_offset.fill_enable;
- const bool line_enable_changed =
- polygon_offset.line_enable != cur_state.polygon_offset.line_enable;
- const bool point_enable_changed =
- polygon_offset.point_enable != cur_state.polygon_offset.point_enable;
- const bool factor_changed = polygon_offset.factor != cur_state.polygon_offset.factor;
- const bool units_changed = polygon_offset.units != cur_state.polygon_offset.units;
- const bool clamp_changed = polygon_offset.clamp != cur_state.polygon_offset.clamp;
-
- if (fill_enable_changed) {
- if (polygon_offset.fill_enable) {
- glEnable(GL_POLYGON_OFFSET_FILL);
- } else {
- glDisable(GL_POLYGON_OFFSET_FILL);
- }
- }
-
- if (line_enable_changed) {
- if (polygon_offset.line_enable) {
- glEnable(GL_POLYGON_OFFSET_LINE);
- } else {
- glDisable(GL_POLYGON_OFFSET_LINE);
- }
- }
-
- if (point_enable_changed) {
- if (polygon_offset.point_enable) {
- glEnable(GL_POLYGON_OFFSET_POINT);
- } else {
- glDisable(GL_POLYGON_OFFSET_POINT);
- }
- }
-
- if (factor_changed || units_changed || clamp_changed) {
+ Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
+ polygon_offset.fill_enable);
+ Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
+ polygon_offset.line_enable);
+ Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
+ polygon_offset.point_enable);
+
+ if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
+ cur_state.polygon_offset.clamp),
+ std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
} else {
- glPolygonOffset(polygon_offset.factor, polygon_offset.units);
UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
"Unimplemented Depth polygon offset clamp.");
+ glPolygonOffset(polygon_offset.factor, polygon_offset.units);
}
}
}
@@ -443,22 +465,21 @@ void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
+ std::array<GLuint, Maxwell::NumTextureSamplers> textures;
for (std::size_t i = 0; i < std::size(texture_units); ++i) {
const auto& texture_unit = texture_units[i];
- const auto& cur_state_texture_unit = cur_state.texture_units[i];
+ auto& cur_state_texture_unit = cur_state.texture_units[i];
textures[i] = texture_unit.texture;
-
- if (textures[i] != cur_state_texture_unit.texture) {
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (cur_state_texture_unit.texture == textures[i])
+ continue;
+ cur_state_texture_unit.texture = textures[i];
+ if (!has_delta) {
+ first = i;
+ has_delta = true;
}
+ last = i;
}
-
if (has_delta) {
glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
textures.data() + first);
@@ -469,16 +490,18 @@ void OpenGLState::ApplySamplers() const {
bool has_delta{};
std::size_t first{};
std::size_t last{};
- std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
+ std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
+
for (std::size_t i = 0; i < std::size(samplers); ++i) {
+ if (cur_state.texture_units[i].sampler == texture_units[i].sampler)
+ continue;
+ cur_state.texture_units[i].sampler = texture_units[i].sampler;
samplers[i] = texture_units[i].sampler;
- if (samplers[i] != cur_state.texture_units[i].sampler) {
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (!has_delta) {
+ first = i;
+ has_delta = true;
}
+ last = i;
}
if (has_delta) {
glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
@@ -486,81 +509,15 @@ void OpenGLState::ApplySamplers() const {
}
}
-void OpenGLState::ApplyFramebufferState() const {
- if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
- glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
- }
- if (draw.draw_framebuffer != cur_state.draw.draw_framebuffer) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
- }
-}
-
-void OpenGLState::ApplyVertexArrayState() const {
- if (draw.vertex_array != cur_state.draw.vertex_array) {
- glBindVertexArray(draw.vertex_array);
- }
-}
-
-void OpenGLState::ApplyDepthClamp() const {
- if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
- depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
- return;
- }
- UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
- "Unimplemented Depth Clamp Separation!");
-
- if (depth_clamp.far_plane || depth_clamp.near_plane) {
- glEnable(GL_DEPTH_CLAMP);
- } else {
- glDisable(GL_DEPTH_CLAMP);
- }
-}
-
void OpenGLState::Apply() const {
ApplyFramebufferState();
ApplyVertexArrayState();
-
- // Shader program
- if (draw.shader_program != cur_state.draw.shader_program) {
- glUseProgram(draw.shader_program);
- }
-
- // Program pipeline
- if (draw.program_pipeline != cur_state.draw.program_pipeline) {
- glBindProgramPipeline(draw.program_pipeline);
- }
- // Clip distance
- for (std::size_t i = 0; i < clip_distance.size(); ++i) {
- if (clip_distance[i] != cur_state.clip_distance[i]) {
- if (clip_distance[i]) {
- glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
- } else {
- glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
- }
- }
- }
- // Point
- if (point.size != cur_state.point.size) {
- glPointSize(point.size);
- }
- if (fragment_color_clamp.enabled != cur_state.fragment_color_clamp.enabled) {
- glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
- fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
- }
- if (multisample_control.alpha_to_coverage != cur_state.multisample_control.alpha_to_coverage) {
- if (multisample_control.alpha_to_coverage) {
- glEnable(GL_SAMPLE_ALPHA_TO_COVERAGE);
- } else {
- glDisable(GL_SAMPLE_ALPHA_TO_COVERAGE);
- }
- }
- if (multisample_control.alpha_to_one != cur_state.multisample_control.alpha_to_one) {
- if (multisample_control.alpha_to_one) {
- glEnable(GL_SAMPLE_ALPHA_TO_ONE);
- } else {
- glDisable(GL_SAMPLE_ALPHA_TO_ONE);
- }
- }
+ ApplyShaderProgram();
+ ApplyProgramPipeline();
+ ApplyClipDistances();
+ ApplyPointSize();
+ ApplyFragmentColorClamp();
+ ApplyMultisample();
ApplyDepthClamp();
ApplyColorMask();
ApplyViewport();
@@ -574,7 +531,28 @@ void OpenGLState::Apply() const {
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
- cur_state = *this;
+}
+
+void OpenGLState::EmulateViewportWithScissor() {
+ auto& current = viewports[0];
+ if (current.scissor.enabled) {
+ const GLint left = std::max(current.x, current.scissor.x);
+ const GLint right =
+ std::max(current.x + current.width, current.scissor.x + current.scissor.width);
+ const GLint bottom = std::max(current.y, current.scissor.y);
+ const GLint top =
+ std::max(current.y + current.height, current.scissor.y + current.scissor.height);
+ current.scissor.x = std::max(left, 0);
+ current.scissor.y = std::max(bottom, 0);
+ current.scissor.width = std::max(right - left, 0);
+ current.scissor.height = std::max(top - bottom, 0);
+ } else {
+ current.scissor.enabled = true;
+ current.scissor.x = current.x;
+ current.scissor.y = current.y;
+ current.scissor.width = current.width;
+ current.scissor.height = current.height;
+ }
}
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 9e1eda5b1..41418a7b8 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -54,10 +54,6 @@ public:
} depth_clamp; // GL_DEPTH_CLAMP
struct {
- bool enabled; // viewports arrays are only supported when geometry shaders are enabled.
- } geometry_shaders;
-
- struct {
bool enabled; // GL_CULL_FACE
GLenum mode; // GL_CULL_FACE_MODE
GLenum front_face; // GL_FRONT_FACE
@@ -184,34 +180,26 @@ public:
static OpenGLState GetCurState() {
return cur_state;
}
+
static bool GetsRGBUsed() {
return s_rgb_used;
}
+
static void ClearsRGBUsed() {
s_rgb_used = false;
}
+
/// Apply this state as the current OpenGL state
void Apply() const;
- /// Apply only the state affecting the framebuffer
+
void ApplyFramebufferState() const;
- /// Apply only the state affecting the vertex array
void ApplyVertexArrayState() const;
- /// Set the initial OpenGL state
- static void ApplyDefaultState();
- /// Resets any references to the given resource
- OpenGLState& UnbindTexture(GLuint handle);
- OpenGLState& ResetSampler(GLuint handle);
- OpenGLState& ResetProgram(GLuint handle);
- OpenGLState& ResetPipeline(GLuint handle);
- OpenGLState& ResetVertexArray(GLuint handle);
- OpenGLState& ResetFramebuffer(GLuint handle);
- void EmulateViewportWithScissor();
-
-private:
- static OpenGLState cur_state;
- // Workaround for sRGB problems caused by
- // QT not supporting srgb output
- static bool s_rgb_used;
+ void ApplyShaderProgram() const;
+ void ApplyProgramPipeline() const;
+ void ApplyClipDistances() const;
+ void ApplyPointSize() const;
+ void ApplyFragmentColorClamp() const;
+ void ApplyMultisample() const;
void ApplySRgb() const;
void ApplyCulling() const;
void ApplyColorMask() const;
@@ -227,6 +215,26 @@ private:
void ApplySamplers() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
+
+ /// Set the initial OpenGL state
+ static void ApplyDefaultState();
+
+ /// Resets any references to the given resource
+ OpenGLState& UnbindTexture(GLuint handle);
+ OpenGLState& ResetSampler(GLuint handle);
+ OpenGLState& ResetProgram(GLuint handle);
+ OpenGLState& ResetPipeline(GLuint handle);
+ OpenGLState& ResetVertexArray(GLuint handle);
+ OpenGLState& ResetFramebuffer(GLuint handle);
+
+ /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
+ void EmulateViewportWithScissor();
+
+private:
+ static OpenGLState cur_state;
+
+ // Workaround for sRGB problems caused by QT not supporting srgb output
+ static bool s_rgb_used;
};
} // namespace OpenGL
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a99ae19bf..a775b402b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -7,7 +7,9 @@
#include <fmt/format.h>
#include "common/assert.h"
+#include "common/bit_field.h"
#include "common/common_types.h"
+#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/shader_ir.h"
@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
- UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
-
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex.texture_type};
const bool is_array = instr.tex.array != 0;
+ const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
- bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
+ bb, instr,
+ GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
break;
}
case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.array == 0);
- UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
- "AOFFI is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
- WriteTexInstructionFloat(bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array));
+ const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+ WriteTexInstructionFloat(
+ bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
break;
}
case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
+ MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
for (u32 element = 0; element < 2; ++element) {
auto params = coords;
- MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporal(bb, element, value);
}
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset) {
+ Node array, Node depth_compare, u32 bias_offset,
+ std::vector<Node> aoffi) {
const bool is_array = array;
const bool is_shadow = depth_compare;
@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
- MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
+ MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
}
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
- TextureProcessMode process_mode, bool depth_compare, bool is_array) {
- const bool lod_bias_enabled =
- (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
+ TextureProcessMode process_mode, bool depth_compare, bool is_array,
+ bool is_aoffi) {
+ const bool lod_bias_enabled{
+ (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+
+ u64 parameter_register = instr.gpr20.Value();
+ if (lod_bias_enabled) {
+ ++parameter_register;
+ }
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
const Node array = is_array ? GetRegister(array_register) : nullptr;
+ std::vector<Node> aoffi;
+ if (is_aoffi) {
+ aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
+ }
+
Node dc{};
if (depth_compare) {
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
// or bias are used
- const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
- dc = GetRegister(depth_register);
+ dc = GetRegister(parameter_register++);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
dc = GetRegister(depth_register);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array) {
+ bool is_array, bool is_aoffi) {
const std::size_t coord_count = GetCoordCount(texture_type);
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
const u64 coord_register = array_register + (is_array ? 1 : 0);
std::vector<Node> coords;
- for (size_t i = 0; i < coord_count; ++i)
+ for (std::size_t i = 0; i < coord_count; ++i) {
coords.push_back(GetRegister(coord_register + i));
+ }
+
+ u64 parameter_register = instr.gpr20.Value();
+ std::vector<Node> aoffi;
+ if (is_aoffi) {
+ aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+ }
+
+ Node dc{};
+ if (depth_compare) {
+ dc = GetRegister(parameter_register++);
+ }
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
+ MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
return {coord_count, total_coord_count};
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
+ bool is_tld4) {
+ const auto [coord_offsets, size, wrap_value,
+ diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
+ if (is_tld4) {
+ return {{0, 8, 16}, 6, 32, 64};
+ } else {
+ return {{0, 4, 8}, 4, 8, 16};
+ }
+ }();
+ const u32 mask = (1U << size) - 1;
+
+ std::vector<Node> aoffi;
+ aoffi.reserve(coord_count);
+
+ const auto aoffi_immediate{
+ TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
+ if (!aoffi_immediate) {
+ // Variable access, not supported on AMD.
+ LOG_WARNING(HW_GPU,
+ "AOFFI constant folding failed, some hardware might have graphical issues");
+ for (std::size_t coord = 0; coord < coord_count; ++coord) {
+ const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+ const Node condition =
+ Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
+ const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
+ aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
+ }
+ return aoffi;
+ }
+
+ for (std::size_t coord = 0; coord < coord_count; ++coord) {
+ s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+ if (value >= wrap_value) {
+ value -= diff_value;
+ }
+ aoffi.push_back(Immediate(value));
+ }
+ return aoffi;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index c34843307..db15c0718 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -29,39 +29,55 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
const bool is_signed_b = instr.xmad.sign_b == 1;
const bool is_signed_c = is_signed_a;
- auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
+ auto [is_merge, is_psl, is_high_b, mode, op_b,
+ op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR:
return {instr.xmad.merge_56,
+ instr.xmad.product_shift_left_second,
+ instr.xmad.high_b,
+ instr.xmad.mode_cbf,
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RR:
- return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+ return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
+ instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RC:
- return {false, GetRegister(instr.gpr39),
+ return {false,
+ false,
+ instr.xmad.high_b,
+ instr.xmad.mode_cbf,
+ GetRegister(instr.gpr39),
GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::XMAD_IMM:
- return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
+ return {instr.xmad.merge_37,
+ instr.xmad.product_shift_left,
+ false,
+ instr.xmad.mode,
+ Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};
}
UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
- return {false, Immediate(0), Immediate(0)};
+ return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
}();
op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
const Node original_b = op_b;
- op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16);
+ op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16);
// TODO(Rodrigo): Use an appropiate sign for this operation
Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
- if (instr.xmad.product_shift_left) {
+ if (is_psl) {
product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
}
+ SetTemporal(bb, 0, product);
+ product = GetTemporal(0);
const Node original_c = op_c;
+ const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
op_c = [&]() {
- switch (instr.xmad.mode) {
+ switch (set_mode) {
case Tegra::Shader::XmadMode::None:
return original_c;
case Tegra::Shader::XmadMode::CLo:
@@ -80,8 +96,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
}
}();
+ SetTemporal(bb, 1, op_c);
+ op_c = GetTemporal(1);
+
// TODO(Rodrigo): Use an appropiate sign for this operation
Node sum = Operation(OperationCode::IAdd, product, op_c);
+ SetTemporal(bb, 2, sum);
+ sum = GetTemporal(2);
if (is_merge) {
const Node a = BitfieldExtract(sum, 0, 16);
const Node b =
@@ -95,4 +116,4 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
return pc;
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 5bc3a3900..4888998d3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstring>
#include <map>
+#include <optional>
#include <set>
#include <string>
#include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
const Sampler& sampler;
Node array{};
Node depth_compare{};
+ std::vector<Node> aoffi;
Node bias{};
Node lod{};
Node component{};
@@ -741,14 +743,14 @@ private:
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array);
+ bool is_array, bool is_aoffi);
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array);
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool depth_compare, bool is_array);
+ bool depth_compare, bool is_array, bool is_aoffi);
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
bool is_array);
@@ -757,9 +759,11 @@ private:
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
+ std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
+
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset);
+ Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);
@@ -773,6 +777,8 @@ private:
Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
+ std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
+
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
template <typename... T>
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 33b071747..4505667ff 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -6,6 +6,7 @@
#include <utility>
#include <variant>
+#include "common/common_types.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) {
for (; cursor >= 0; --cursor) {
- const Node node = code[cursor];
+ const Node node = code.at(cursor);
if (const auto operation = std::get_if<OperationNode>(node)) {
if (operation->GetCode() == operation_code)
return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
return nullptr;
}
+std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
+ // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
+ // that it uses as operand
+ const auto [found, found_cursor] =
+ TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
+ if (!found) {
+ return {};
+ }
+ if (const auto immediate = std::get_if<ImmediateNode>(found)) {
+ return immediate->GetValue();
+ }
+ return {};
+}
+
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
s64 cursor) {
for (; cursor >= 0; --cursor) {