summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.h15
-rw-r--r--src/video_core/engines/shader_bytecode.h111
-rw-r--r--src/video_core/gpu.cpp1
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h98
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp331
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h50
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h2
10 files changed, 493 insertions, 138 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 771eb5abc..3c869d3a1 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -679,7 +679,19 @@ public:
INSERT_PADDING_WORDS(0x7);
- INSERT_PADDING_WORDS(0x46);
+ INSERT_PADDING_WORDS(0x20);
+
+ struct {
+ u32 is_instanced[NumVertexArrays];
+
+ /// Returns whether the vertex array specified by index is supposed to be
+ /// accessed per instance or not.
+ bool IsInstancingEnabled(u32 index) const {
+ return is_instanced[index];
+ }
+ } instanced_arrays;
+
+ INSERT_PADDING_WORDS(0x6);
Cull cull;
@@ -928,6 +940,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
+ASSERT_REG_POSITION(instanced_arrays, 0x620);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(clear_buffers, 0x674);
ASSERT_REG_POSITION(query, 0x6C0);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index b038a9d92..3ba6fe614 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -12,6 +12,7 @@
#include <boost/optional.hpp>
+#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
@@ -79,6 +80,9 @@ union Attribute {
// shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
// shader.
TessCoordInstanceIDVertexID = 47,
+ // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
+ // shader. It is unknown what the other values contain.
+ FrontFacing = 63,
};
union {
@@ -214,6 +218,18 @@ enum class FlowCondition : u64 {
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
};
+enum class PredicateResultMode : u64 {
+ None = 0x0,
+ NotZero = 0x3,
+};
+
+enum class TextureType : u64 {
+ Texture1D = 0,
+ Texture2D = 1,
+ Texture3D = 2,
+ TextureCube = 3,
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -254,7 +270,7 @@ union Instruction {
BitField<39, 1, u64> invert_a;
BitField<40, 1, u64> invert_b;
BitField<41, 2, LogicOperation> operation;
- BitField<44, 2, u64> unk44;
+ BitField<44, 2, PredicateResultMode> pred_result_mode;
BitField<48, 3, Pred> pred48;
} lop;
@@ -284,6 +300,10 @@ union Instruction {
} alu;
union {
+ BitField<48, 1, u64> negate_b;
+ } fmul;
+
+ union {
BitField<48, 1, u64> is_signed;
} shift;
@@ -421,6 +441,8 @@ union Instruction {
} conversion;
union {
+ BitField<28, 1, u64> array;
+ BitField<29, 2, TextureType> texture_type;
BitField<31, 4, u64> component_mask;
bool IsComponentEnabled(size_t component) const {
@@ -429,29 +451,88 @@ union Instruction {
} tex;
union {
- BitField<50, 3, u64> component_mask_selector;
+ BitField<28, 1, u64> array;
+ BitField<29, 2, TextureType> texture_type;
+ BitField<56, 2, u64> component;
+ } tld4;
+
+ union {
+ BitField<52, 2, u64> component;
+ } tld4s;
+
+ union {
BitField<0, 8, Register> gpr0;
BitField<28, 8, Register> gpr28;
+ BitField<50, 3, u64> component_mask_selector;
+ BitField<53, 4, u64> texture_info;
+
+ TextureType GetTextureType() const {
+ // The TEXS instruction has a weird encoding for the texture type.
+ if (texture_info == 0)
+ return TextureType::Texture1D;
+ if (texture_info >= 1 && texture_info <= 9)
+ return TextureType::Texture2D;
+ if (texture_info >= 10 && texture_info <= 11)
+ return TextureType::Texture3D;
+ if (texture_info >= 12 && texture_info <= 13)
+ return TextureType::TextureCube;
+
+ UNIMPLEMENTED();
+ }
+
+ bool IsArrayTexture() const {
+ // TEXS only supports Texture2D arrays.
+ return texture_info >= 7 && texture_info <= 9;
+ }
bool HasTwoDestinations() const {
return gpr28.Value() != Register::ZeroIndex;
}
bool IsComponentEnabled(size_t component) const {
- static constexpr std::array<std::array<u32, 8>, 4> mask_lut{
- {{},
- {0x1, 0x2, 0x4, 0x8, 0x3},
- {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
- {0x7, 0xb, 0xd, 0xe, 0xf}}};
+ static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
+ {},
+ {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+ {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
+ {0x7, 0xb, 0xd, 0xe, 0xf},
+ }};
size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
- return ((1ull << component) & mask_lut[index][component_mask_selector]) != 0;
+ u32 mask = mask_lut[index][component_mask_selector];
+ // A mask of 0 means this instruction uses an unimplemented mask.
+ ASSERT(mask != 0);
+ return ((1ull << component) & mask) != 0;
}
} texs;
union {
+ BitField<53, 4, u64> texture_info;
+
+ TextureType GetTextureType() const {
+ // The TLDS instruction has a weird encoding for the texture type.
+ if (texture_info >= 0 && texture_info <= 1) {
+ return TextureType::Texture1D;
+ }
+ if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
+ texture_info >= 4 && texture_info <= 6) {
+ return TextureType::Texture2D;
+ }
+ if (texture_info == 7) {
+ return TextureType::Texture3D;
+ }
+
+ UNIMPLEMENTED();
+ }
+
+ bool IsArrayTexture() const {
+ // TEXS only supports Texture2D arrays.
+ return texture_info == 8;
+ }
+ } tlds;
+
+ union {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
@@ -513,10 +594,14 @@ public:
LD_A,
LD_C,
ST_A,
+ LDG, // Load from global memory
+ STG, // Store in global memory
TEX,
- TEXQ, // Texture Query
- TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
- TLDS, // Texture Load with scalar/non-vec4 source/destinations
+ TEXQ, // Texture Query
+ TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
+ TLDS, // Texture Load with scalar/non-vec4 source/destinations
+ TLD4, // Texture Load 4
+ TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
EXIT,
IPA,
FFMA_IMM, // Fused Multiply and Add
@@ -724,10 +809,14 @@ private:
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
+ INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
+ INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
+ INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
+ INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 5a593c1f7..9758adcfd 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -55,6 +55,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
case RenderTargetFormat::RGBA8_UNORM:
case RenderTargetFormat::RGBA8_SNORM:
case RenderTargetFormat::RGBA8_SRGB:
+ case RenderTargetFormat::RGBA8_UINT:
case RenderTargetFormat::RGB10_A2_UNORM:
case RenderTargetFormat::BGRA8_UNORM:
case RenderTargetFormat::RG16_UNORM:
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 97dcccb92..2697e1c27 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -30,6 +30,7 @@ enum class RenderTargetFormat : u32 {
RGBA8_UNORM = 0xD5,
RGBA8_SRGB = 0xD6,
RGBA8_SNORM = 0xD7,
+ RGBA8_UINT = 0xD9,
RG16_UNORM = 0xDA,
RG16_SNORM = 0xDB,
RG16_SINT = 0xDC,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 93eadde7a..fe1f55e85 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -98,7 +98,8 @@ RasterizerOpenGL::~RasterizerOpenGL() {}
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO);
- const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+ const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+ const auto& regs = gpu.regs;
state.draw.vertex_array = hw_vao.handle;
state.draw.vertex_buffer = stream_buffer.GetHandle();
@@ -110,9 +111,13 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
if (!vertex_array.IsEnabled())
continue;
- const Tegra::GPUVAddr start = vertex_array.StartAddress();
+ Tegra::GPUVAddr start = vertex_array.StartAddress();
const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
+ if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
+ start += vertex_array.stride * (gpu.state.current_instance / vertex_array.divisor);
+ }
+
ASSERT(end > start);
u64 size = end - start + 1;
@@ -124,7 +129,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
vertex_array.stride);
- ASSERT_MSG(vertex_array.divisor == 0, "Instanced vertex arrays are not supported");
+ if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
+ // Tell OpenGL that this is an instanced vertex buffer to prevent accessing different
+ // indexes on each vertex. We do the instance indexing manually by incrementing the
+ // start address of the vertex buffer.
+ glVertexBindingDivisor(index, 1);
+ } else {
+ // Disable the vertex buffer instancing.
+ glVertexBindingDivisor(index, 0);
+ }
}
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 38aa067b6..fb7476fb8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -94,6 +94,7 @@ struct FormatTuple {
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
false}, // A2B10G10R10U
@@ -245,6 +246,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
// clang-format off
MortonCopy<true, PixelFormat::ABGR8U>,
MortonCopy<true, PixelFormat::ABGR8S>,
+ MortonCopy<true, PixelFormat::ABGR8UI>,
MortonCopy<true, PixelFormat::B5G6R5U>,
MortonCopy<true, PixelFormat::A2B10G10R10U>,
MortonCopy<true, PixelFormat::A1B5G5R5U>,
@@ -299,6 +301,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
// clang-format off
MortonCopy<false, PixelFormat::ABGR8U>,
MortonCopy<false, PixelFormat::ABGR8S>,
+ MortonCopy<false, PixelFormat::ABGR8UI>,
MortonCopy<false, PixelFormat::B5G6R5U>,
MortonCopy<false, PixelFormat::A2B10G10R10U>,
MortonCopy<false, PixelFormat::A1B5G5R5U>,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index beec01746..fc8b44219 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -25,59 +25,60 @@ struct SurfaceParams {
enum class PixelFormat {
ABGR8U = 0,
ABGR8S = 1,
- B5G6R5U = 2,
- A2B10G10R10U = 3,
- A1B5G5R5U = 4,
- R8U = 5,
- R8UI = 6,
- RGBA16F = 7,
- RGBA16U = 8,
- RGBA16UI = 9,
- R11FG11FB10F = 10,
- RGBA32UI = 11,
- DXT1 = 12,
- DXT23 = 13,
- DXT45 = 14,
- DXN1 = 15, // This is also known as BC4
- DXN2UNORM = 16,
- DXN2SNORM = 17,
- BC7U = 18,
- ASTC_2D_4X4 = 19,
- G8R8U = 20,
- G8R8S = 21,
- BGRA8 = 22,
- RGBA32F = 23,
- RG32F = 24,
- R32F = 25,
- R16F = 26,
- R16U = 27,
- R16S = 28,
- R16UI = 29,
- R16I = 30,
- RG16 = 31,
- RG16F = 32,
- RG16UI = 33,
- RG16I = 34,
- RG16S = 35,
- RGB32F = 36,
- SRGBA8 = 37,
- RG8U = 38,
- RG8S = 39,
- RG32UI = 40,
- R32UI = 41,
+ ABGR8UI = 2,
+ B5G6R5U = 3,
+ A2B10G10R10U = 4,
+ A1B5G5R5U = 5,
+ R8U = 6,
+ R8UI = 7,
+ RGBA16F = 8,
+ RGBA16U = 9,
+ RGBA16UI = 10,
+ R11FG11FB10F = 11,
+ RGBA32UI = 12,
+ DXT1 = 13,
+ DXT23 = 14,
+ DXT45 = 15,
+ DXN1 = 16, // This is also known as BC4
+ DXN2UNORM = 17,
+ DXN2SNORM = 18,
+ BC7U = 19,
+ ASTC_2D_4X4 = 20,
+ G8R8U = 21,
+ G8R8S = 22,
+ BGRA8 = 23,
+ RGBA32F = 24,
+ RG32F = 25,
+ R32F = 26,
+ R16F = 27,
+ R16U = 28,
+ R16S = 29,
+ R16UI = 30,
+ R16I = 31,
+ RG16 = 32,
+ RG16F = 33,
+ RG16UI = 34,
+ RG16I = 35,
+ RG16S = 36,
+ RGB32F = 37,
+ SRGBA8 = 38,
+ RG8U = 39,
+ RG8S = 40,
+ RG32UI = 41,
+ R32UI = 42,
MaxColorFormat,
// Depth formats
- Z32F = 42,
- Z16 = 43,
+ Z32F = 43,
+ Z16 = 44,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 44,
- S8Z24 = 45,
- Z32FS8 = 46,
+ Z24S8 = 45,
+ S8Z24 = 46,
+ Z32FS8 = 47,
MaxDepthStencilFormat,
@@ -117,6 +118,7 @@ struct SurfaceParams {
constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
1, // ABGR8U
1, // ABGR8S
+ 1, // ABGR8UI
1, // B5G6R5U
1, // A2B10G10R10U
1, // A1B5G5R5U
@@ -175,6 +177,7 @@ struct SurfaceParams {
constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
32, // ABGR8U
32, // ABGR8S
+ 32, // ABGR8UI
16, // B5G6R5U
32, // A2B10G10R10U
16, // A1B5G5R5U
@@ -257,6 +260,8 @@ struct SurfaceParams {
return PixelFormat::ABGR8U;
case Tegra::RenderTargetFormat::RGBA8_SNORM:
return PixelFormat::ABGR8S;
+ case Tegra::RenderTargetFormat::RGBA8_UINT:
+ return PixelFormat::ABGR8UI;
case Tegra::RenderTargetFormat::BGRA8_UNORM:
return PixelFormat::BGRA8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
@@ -327,6 +332,8 @@ struct SurfaceParams {
return PixelFormat::ABGR8U;
case Tegra::Texture::ComponentType::SNORM:
return PixelFormat::ABGR8S;
+ case Tegra::Texture::ComponentType::UINT:
+ return PixelFormat::ABGR8UI;
}
LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
static_cast<u32>(component_type));
@@ -551,6 +558,7 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::R16_UINT:
case Tegra::RenderTargetFormat::RG32_UINT:
case Tegra::RenderTargetFormat::R32_UINT:
+ case Tegra::RenderTargetFormat::RGBA8_UINT:
return ComponentType::UInt;
case Tegra::RenderTargetFormat::RG16_SINT:
case Tegra::RenderTargetFormat::R16_SINT:
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bb01b3c27..ac6ccfec7 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -439,13 +439,12 @@ public:
}
declarations.AddNewLine();
- // Append the sampler2D array for the used textures.
- size_t num_samplers = GetSamplers().size();
- if (num_samplers > 0) {
- declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
- std::to_string(num_samplers) + "];");
- declarations.AddNewLine();
+ const auto& samplers = GetSamplers();
+ for (const auto& sampler : samplers) {
+ declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
+ ';');
}
+ declarations.AddNewLine();
}
/// Returns a list of constant buffer declarations
@@ -457,13 +456,14 @@ public:
}
/// Returns a list of samplers used in the shader
- std::vector<SamplerEntry> GetSamplers() const {
+ const std::vector<SamplerEntry>& GetSamplers() const {
return used_samplers;
}
/// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
/// necessary.
- std::string AccessSampler(const Sampler& sampler) {
+ std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
+ bool is_array) {
size_t offset = static_cast<size_t>(sampler.index.Value());
// If this sampler has already been used, return the existing mapping.
@@ -472,12 +472,13 @@ public:
[&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
if (itr != used_samplers.end()) {
+ ASSERT(itr->GetType() == type && itr->IsArray() == is_array);
return itr->GetName();
}
// Otherwise create a new mapping for this sampler
size_t next_index = used_samplers.size();
- SamplerEntry entry{stage, offset, next_index};
+ SamplerEntry entry{stage, offset, next_index, type, is_array};
used_samplers.emplace_back(entry);
return entry.GetName();
}
@@ -542,6 +543,10 @@ private:
// shader.
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))";
+ case Attribute::Index::FrontFacing:
+ // TODO(Subv): Find out what the values are for the other elements.
+ ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
+ return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -634,8 +639,8 @@ private:
}
/// Generates code representing a texture sampler.
- std::string GetSampler(const Sampler& sampler) {
- return regs.AccessSampler(sampler);
+ std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) {
+ return regs.AccessSampler(sampler, type, is_array);
}
/**
@@ -743,6 +748,30 @@ private:
return op->second;
}
+ /**
+ * Transforms the input string GLSL operand into one that applies the abs() function and negates
+ * the output if necessary. When both abs and neg are true, the negation will be applied after
+ * taking the absolute value.
+ * @param operand The input operand to take the abs() of, negate, or both.
+ * @param abs Whether to apply the abs() function to the input operand.
+ * @param neg Whether to negate the input operand.
+ * @returns String corresponding to the operand after being transformed by the abs() and
+ * negation operations.
+ */
+ static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) {
+ std::string result = operand;
+
+ if (abs) {
+ result = "abs(" + result + ')';
+ }
+
+ if (neg) {
+ result = "-(" + result + ')';
+ }
+
+ return result;
+ }
+
/*
* Returns whether the instruction at the specified offset is a 'sched' instruction.
* Sched instructions always appear before a sequence of 3 instructions.
@@ -756,28 +785,51 @@ private:
}
void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
- const std::string& op_b) {
+ const std::string& op_b,
+ Tegra::Shader::PredicateResultMode predicate_mode,
+ Tegra::Shader::Pred predicate) {
+ std::string result{};
switch (logic_op) {
case LogicOperation::And: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+ result = '(' + op_a + " & " + op_b + ')';
break;
}
case LogicOperation::Or: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+ result = '(' + op_a + " | " + op_b + ')';
break;
}
case LogicOperation::Xor: {
- regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+ result = '(' + op_a + " ^ " + op_b + ')';
break;
}
case LogicOperation::PassB: {
- regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+ result = op_b;
break;
}
default:
LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
UNREACHABLE();
}
+
+ if (dest != Tegra::Shader::Register::ZeroIndex) {
+ regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+ }
+
+ using Tegra::Shader::PredicateResultMode;
+ // Write the predicate value depending on the predicate mode.
+ switch (predicate_mode) {
+ case PredicateResultMode::None:
+ // Do nothing.
+ return;
+ case PredicateResultMode::NotZero:
+ // Set the predicate to true if the result is not zero.
+ SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0");
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}",
+ static_cast<u32>(predicate_mode));
+ UNREACHABLE();
+ }
}
void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
@@ -788,29 +840,56 @@ private:
++shader.scope;
shader.AddLine(coord);
- // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA
- // goes into gpr28+0 and gpr28+1
- size_t texs_offset{};
-
- size_t src_elem{};
- for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
- size_t dest_elem{};
- for (unsigned elem = 0; elem < 2; ++elem) {
- if (!instr.texs.IsComponentEnabled(src_elem++)) {
- // Skip disabled components
- continue;
- }
- regs.SetRegisterToFloat(dest, elem + texs_offset, texture, 1, 4, false,
- dest_elem++);
+ // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
+ // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
+
+ size_t written_components = 0;
+ for (u32 component = 0; component < 4; ++component) {
+ if (!instr.texs.IsComponentEnabled(component)) {
+ continue;
}
- if (!instr.texs.HasTwoDestinations()) {
- // Skip the second destination
- break;
+ if (written_components < 2) {
+ // Write the first two swizzle components to gpr0 and gpr0+1
+ regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false,
+ written_components % 2);
+ } else {
+ ASSERT(instr.texs.HasTwoDestinations());
+ // Write the rest of the swizzle components to gpr28 and gpr28+1
+ regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false,
+ written_components % 2);
}
- texs_offset += 2;
+ ++written_components;
}
+
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
+ /*
+ * Emits code to push the input target address to the SSY address stack, incrementing the stack
+ * top.
+ */
+ void EmitPushToSSYStack(u32 target) {
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine("ssy_stack[ssy_stack_top] = " + std::to_string(target) + "u;");
+ shader.AddLine("ssy_stack_top++;");
+ --shader.scope;
+ shader.AddLine('}');
+ }
+
+ /*
+ * Emits code to pop an address from the SSY address stack, setting the jump address to the
+ * popped address and decrementing the stack top.
+ */
+ void EmitPopFromSSYStack() {
+ shader.AddLine('{');
+ ++shader.scope;
+ shader.AddLine("ssy_stack_top--;");
+ shader.AddLine("jmp_to = ssy_stack[ssy_stack_top];");
+ shader.AddLine("break;");
--shader.scope;
shader.AddLine('}');
}
@@ -859,13 +938,6 @@ private:
switch (opcode->GetType()) {
case OpCode::Type::Arithmetic: {
std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- if (instr.alu.abs_a) {
- op_a = "abs(" + op_a + ')';
- }
-
- if (instr.alu.negate_a) {
- op_a = "-(" + op_a + ')';
- }
std::string op_b;
@@ -880,17 +952,10 @@ private:
}
}
- if (instr.alu.abs_b) {
- op_b = "abs(" + op_b + ')';
- }
-
- if (instr.alu.negate_b) {
- op_b = "-(" + op_b + ')';
- }
-
switch (opcode->GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
+ // MOV does not have neither 'abs' nor 'neg' bits.
regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
break;
}
@@ -898,6 +963,8 @@ private:
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
+ // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+ op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
instr.alu.saturate_d);
break;
@@ -905,11 +972,14 @@ private:
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
+ op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
+ op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
instr.alu.saturate_d);
break;
}
case OpCode::Id::MUFU: {
+ op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
switch (instr.sub_op) {
case SubOp::Cos:
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
@@ -949,6 +1019,9 @@ private:
case OpCode::Id::FMNMX_C:
case OpCode::Id::FMNMX_R:
case OpCode::Id::FMNMX_IMM: {
+ op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
+ op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
std::string condition =
GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
std::string parameters = op_a + ',' + op_b;
@@ -962,7 +1035,7 @@ private:
case OpCode::Id::RRO_R:
case OpCode::Id::RRO_IMM: {
// Currently RRO is only implemented as a register move.
- // Usage of `abs_b` and `negate_b` here should also be correct.
+ op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1);
LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
break;
@@ -1099,7 +1172,9 @@ private:
if (instr.alu.lop32i.invert_b)
op_b = "~(" + op_b + ')';
- WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+ WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
+ Tegra::Shader::PredicateResultMode::None,
+ Tegra::Shader::Pred::UnusedIndex);
break;
}
default: {
@@ -1165,16 +1240,14 @@ private:
case OpCode::Id::LOP_C:
case OpCode::Id::LOP_R:
case OpCode::Id::LOP_IMM: {
- ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
- ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
-
if (instr.alu.lop.invert_a)
op_a = "~(" + op_a + ')';
if (instr.alu.lop.invert_b)
op_b = "~(" + op_b + ')';
- WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+ WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+ instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
break;
}
case OpCode::Id::IMNMX_C:
@@ -1239,8 +1312,6 @@ private:
break;
}
case OpCode::Type::Conversion: {
- ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
-
switch (opcode->GetId()) {
case OpCode::Id::I2I_R: {
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
@@ -1437,10 +1508,29 @@ private:
break;
}
case OpCode::Id::TEX: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented");
+ std::string coord{};
+
+ switch (instr.tex.texture_type) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ case Tegra::Shader::TextureType::Texture3D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ std::string z = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+ break;
+ }
+ default:
+ UNIMPLEMENTED();
+ }
+
+ const std::string sampler =
+ GetSampler(instr.sampler, instr.tex.texture_type, instr.tex.array);
// Add an extra scope and declare the texture coords inside to prevent
// overwriting them in case they are used as outputs of the texs instruction.
shader.AddLine("{");
@@ -1462,24 +1552,115 @@ private:
break;
}
case OpCode::Id::TEXS: {
- const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ std::string coord{};
+
+ switch (instr.texs.GetTextureType()) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (instr.texs.IsArrayTexture()) {
+ std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
+ } else {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ case Tegra::Shader::TextureType::TextureCube: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ std::string z = regs.GetRegisterAsFloat(instr.gpr20);
+ coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
+ break;
+ }
+ default:
+ UNIMPLEMENTED();
+ }
+ const std::string sampler = GetSampler(instr.sampler, instr.texs.GetTextureType(),
+ instr.texs.IsArrayTexture());
const std::string texture = "texture(" + sampler + ", coords)";
WriteTexsInstruction(instr, coord, texture);
break;
}
case OpCode::Id::TLDS: {
- const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string op_b = regs.GetRegisterAsInteger(instr.gpr20);
- const std::string sampler = GetSampler(instr.sampler);
- const std::string coord = "ivec2 coords = ivec2(" + op_a + ", " + op_b + ");";
+ ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D);
+ ASSERT(instr.tlds.IsArrayTexture() == false);
+ std::string coord{};
+
+ switch (instr.tlds.GetTextureType()) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ if (instr.tlds.IsArrayTexture()) {
+ UNIMPLEMENTED();
+ } else {
+ std::string x = regs.GetRegisterAsInteger(instr.gpr8);
+ std::string y = regs.GetRegisterAsInteger(instr.gpr20);
+ coord = "ivec2 coords = ivec2(" + x + ", " + y + ");";
+ }
+ break;
+ }
+ default:
+ UNIMPLEMENTED();
+ }
+ const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(),
+ instr.tlds.IsArrayTexture());
const std::string texture = "texelFetch(" + sampler + ", coords, 0)";
WriteTexsInstruction(instr, coord, texture);
break;
}
+ case OpCode::Id::TLD4: {
+ ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
+ ASSERT(instr.tld4.array == 0);
+ std::string coord{};
+
+ switch (instr.tld4.texture_type) {
+ case Tegra::Shader::TextureType::Texture2D: {
+ std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+ std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+ coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ break;
+ }
+ default:
+ UNIMPLEMENTED();
+ }
+
+ const std::string sampler =
+ GetSampler(instr.sampler, instr.tld4.texture_type, instr.tld4.array);
+ // Add an extra scope and declare the texture coords inside to prevent
+ // overwriting them in case they are used as outputs of the texs instruction.
+ shader.AddLine("{");
+ ++shader.scope;
+ shader.AddLine(coord);
+ const std::string texture = "textureGather(" + sampler + ", coords, " +
+ std::to_string(instr.tld4.component) + ')';
+
+ size_t dest_elem{};
+ for (size_t elem = 0; elem < 4; ++elem) {
+ if (!instr.tex.IsComponentEnabled(elem)) {
+ // Skip disabled components
+ continue;
+ }
+ regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
+ ++dest_elem;
+ }
+ --shader.scope;
+ shader.AddLine("}");
+ break;
+ }
+ case OpCode::Id::TLD4S: {
+ const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+ const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+ // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+ const std::string sampler =
+ GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
+ const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+ const std::string texture = "textureGather(" + sampler + ", coords, " +
+ std::to_string(instr.tld4s.component) + ')';
+ WriteTexsInstruction(instr, coord, texture);
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
UNREACHABLE();
@@ -1843,13 +2024,13 @@ private:
ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
u32 target = offset + instr.bra.GetBranchTarget();
- shader.AddLine("ssy_target = " + std::to_string(target) + "u;");
+ EmitPushToSSYStack(target);
break;
}
case OpCode::Id::SYNC: {
// The SYNC opcode jumps to the address previously set by the SSY opcode
ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
- shader.AddLine("{ jmp_to = ssy_target; break; }");
+ EmitPopFromSSYStack();
break;
}
case OpCode::Id::DEPBAR: {
@@ -1920,7 +2101,13 @@ private:
} else {
labels.insert(subroutine.begin);
shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
- shader.AddLine("uint ssy_target = 0u;");
+
+ // TODO(Subv): Figure out the actual depth of the SSY stack, for now it seems
+ // unlikely that shaders will use 20 nested SSYs.
+ constexpr u32 SSY_STACK_SIZE = 20;
+ shader.AddLine("uint ssy_stack[" + std::to_string(SSY_STACK_SIZE) + "];");
+ shader.AddLine("uint ssy_stack_top = 0u;");
+
shader.AddLine("while (true) {");
++shader.scope;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 4729ce0fc..db48da645 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -11,6 +11,7 @@
#include <vector>
#include "common/common_types.h"
#include "common/hash.h"
+#include "video_core/engines/shader_bytecode.h"
namespace GLShader {
@@ -72,8 +73,9 @@ class SamplerEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
- SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
- : offset(offset), stage(stage), sampler_index(index) {}
+ SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index,
+ Tegra::Shader::TextureType type, bool is_array)
+ : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
size_t GetOffset() const {
return offset;
@@ -88,8 +90,41 @@ public:
}
std::string GetName() const {
- return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
- std::to_string(sampler_index) + ']';
+ return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' +
+ std::to_string(sampler_index);
+ }
+
+ std::string GetTypeString() const {
+ using Tegra::Shader::TextureType;
+ std::string glsl_type;
+
+ switch (type) {
+ case TextureType::Texture1D:
+ glsl_type = "sampler1D";
+ break;
+ case TextureType::Texture2D:
+ glsl_type = "sampler2D";
+ break;
+ case TextureType::Texture3D:
+ glsl_type = "sampler3D";
+ break;
+ case TextureType::TextureCube:
+ glsl_type = "samplerCube";
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ if (is_array)
+ glsl_type += "Array";
+ return glsl_type;
+ }
+
+ Tegra::Shader::TextureType GetType() const {
+ return type;
+ }
+
+ bool IsArray() const {
+ return is_array;
}
static std::string GetArrayName(Maxwell::ShaderStage stage) {
@@ -100,11 +135,14 @@ private:
static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
"tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
};
+
/// Offset in TSC memory from which to read the sampler object, as specified by the sampling
/// instruction.
size_t offset;
- Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
- size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+ Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
+ size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+ Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
+ bool is_array; ///< Whether the texture is being sampled as an array texture or not.
};
struct ShaderEntries {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 8f719fdd8..5d91a0c2f 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -147,6 +147,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
// GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
// manually mix them. However the shader part of this is not yet implemented.
return GL_CLAMP_TO_BORDER;
+ case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
+ return GL_MIRROR_CLAMP_TO_EDGE;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
UNREACHABLE();