diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 15 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 111 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 1 | ||||
-rw-r--r-- | src/video_core/gpu.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 19 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 98 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 331 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 50 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 2 |
10 files changed, 493 insertions, 138 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 771eb5abc..3c869d3a1 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -679,7 +679,19 @@ public: INSERT_PADDING_WORDS(0x7); - INSERT_PADDING_WORDS(0x46); + INSERT_PADDING_WORDS(0x20); + + struct { + u32 is_instanced[NumVertexArrays]; + + /// Returns whether the vertex array specified by index is supposed to be + /// accessed per instance or not. + bool IsInstancingEnabled(u32 index) const { + return is_instanced[index]; + } + } instanced_arrays; + + INSERT_PADDING_WORDS(0x6); Cull cull; @@ -928,6 +940,7 @@ ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(index_array, 0x5F2); +ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(cull, 0x646); ASSERT_REG_POSITION(clear_buffers, 0x674); ASSERT_REG_POSITION(query, 0x6C0); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index b038a9d92..3ba6fe614 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -12,6 +12,7 @@ #include <boost/optional.hpp> +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" @@ -79,6 +80,9 @@ union Attribute { // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval // shader. TessCoordInstanceIDVertexID = 47, + // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment + // shader. It is unknown what the other values contain. + FrontFacing = 63, }; union { @@ -214,6 +218,18 @@ enum class FlowCondition : u64 { Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? }; +enum class PredicateResultMode : u64 { + None = 0x0, + NotZero = 0x3, +}; + +enum class TextureType : u64 { + Texture1D = 0, + Texture2D = 1, + Texture3D = 2, + TextureCube = 3, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -254,7 +270,7 @@ union Instruction { BitField<39, 1, u64> invert_a; BitField<40, 1, u64> invert_b; BitField<41, 2, LogicOperation> operation; - BitField<44, 2, u64> unk44; + BitField<44, 2, PredicateResultMode> pred_result_mode; BitField<48, 3, Pred> pred48; } lop; @@ -284,6 +300,10 @@ union Instruction { } alu; union { + BitField<48, 1, u64> negate_b; + } fmul; + + union { BitField<48, 1, u64> is_signed; } shift; @@ -421,6 +441,8 @@ union Instruction { } conversion; union { + BitField<28, 1, u64> array; + BitField<29, 2, TextureType> texture_type; BitField<31, 4, u64> component_mask; bool IsComponentEnabled(size_t component) const { @@ -429,29 +451,88 @@ union Instruction { } tex; union { - BitField<50, 3, u64> component_mask_selector; + BitField<28, 1, u64> array; + BitField<29, 2, TextureType> texture_type; + BitField<56, 2, u64> component; + } tld4; + + union { + BitField<52, 2, u64> component; + } tld4s; + + union { BitField<0, 8, Register> gpr0; BitField<28, 8, Register> gpr28; + BitField<50, 3, u64> component_mask_selector; + BitField<53, 4, u64> texture_info; + + TextureType GetTextureType() const { + // The TEXS instruction has a weird encoding for the texture type. + if (texture_info == 0) + return TextureType::Texture1D; + if (texture_info >= 1 && texture_info <= 9) + return TextureType::Texture2D; + if (texture_info >= 10 && texture_info <= 11) + return TextureType::Texture3D; + if (texture_info >= 12 && texture_info <= 13) + return TextureType::TextureCube; + + UNIMPLEMENTED(); + } + + bool IsArrayTexture() const { + // TEXS only supports Texture2D arrays. + return texture_info >= 7 && texture_info <= 9; + } bool HasTwoDestinations() const { return gpr28.Value() != Register::ZeroIndex; } bool IsComponentEnabled(size_t component) const { - static constexpr std::array<std::array<u32, 8>, 4> mask_lut{ - {{}, - {0x1, 0x2, 0x4, 0x8, 0x3}, - {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, - {0x7, 0xb, 0xd, 0xe, 0xf}}}; + static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ + {}, + {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, + {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, + {0x7, 0xb, 0xd, 0xe, 0xf}, + }}; size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; - return ((1ull << component) & mask_lut[index][component_mask_selector]) != 0; + u32 mask = mask_lut[index][component_mask_selector]; + // A mask of 0 means this instruction uses an unimplemented mask. + ASSERT(mask != 0); + return ((1ull << component) & mask) != 0; } } texs; union { + BitField<53, 4, u64> texture_info; + + TextureType GetTextureType() const { + // The TLDS instruction has a weird encoding for the texture type. + if (texture_info >= 0 && texture_info <= 1) { + return TextureType::Texture1D; + } + if (texture_info == 2 || texture_info == 8 || texture_info == 12 || + texture_info >= 4 && texture_info <= 6) { + return TextureType::Texture2D; + } + if (texture_info == 7) { + return TextureType::Texture3D; + } + + UNIMPLEMENTED(); + } + + bool IsArrayTexture() const { + // TEXS only supports Texture2D arrays. + return texture_info == 8; + } + } tlds; + + union { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; @@ -513,10 +594,14 @@ public: LD_A, LD_C, ST_A, + LDG, // Load from global memory + STG, // Store in global memory TEX, - TEXQ, // Texture Query - TEXS, // Texture Fetch with scalar/non-vec4 source/destinations - TLDS, // Texture Load with scalar/non-vec4 source/destinations + TEXQ, // Texture Query + TEXS, // Texture Fetch with scalar/non-vec4 source/destinations + TLDS, // Texture Load with scalar/non-vec4 source/destinations + TLD4, // Texture Load 4 + TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations EXIT, IPA, FFMA_IMM, // Fused Multiply and Add @@ -724,10 +809,14 @@ private: INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), + INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), + INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), + INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), + INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 5a593c1f7..9758adcfd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -55,6 +55,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RGBA8_UNORM: case RenderTargetFormat::RGBA8_SNORM: case RenderTargetFormat::RGBA8_SRGB: + case RenderTargetFormat::RGBA8_UINT: case RenderTargetFormat::RGB10_A2_UNORM: case RenderTargetFormat::BGRA8_UNORM: case RenderTargetFormat::RG16_UNORM: diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 97dcccb92..2697e1c27 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -30,6 +30,7 @@ enum class RenderTargetFormat : u32 { RGBA8_UNORM = 0xD5, RGBA8_SRGB = 0xD6, RGBA8_SNORM = 0xD7, + RGBA8_UINT = 0xD9, RG16_UNORM = 0xDA, RG16_SNORM = 0xDB, RG16_SINT = 0xDC, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 93eadde7a..fe1f55e85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -98,7 +98,8 @@ RasterizerOpenGL::~RasterizerOpenGL() {} std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); - const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); + const auto& regs = gpu.regs; state.draw.vertex_array = hw_vao.handle; state.draw.vertex_buffer = stream_buffer.GetHandle(); @@ -110,9 +111,13 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, if (!vertex_array.IsEnabled()) continue; - const Tegra::GPUVAddr start = vertex_array.StartAddress(); + Tegra::GPUVAddr start = vertex_array.StartAddress(); const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); + if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { + start += vertex_array.stride * (gpu.state.current_instance / vertex_array.divisor); + } + ASSERT(end > start); u64 size = end - start + 1; @@ -124,7 +129,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, vertex_array.stride); - ASSERT_MSG(vertex_array.divisor == 0, "Instanced vertex arrays are not supported"); + if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { + // Tell OpenGL that this is an instanced vertex buffer to prevent accessing different + // indexes on each vertex. We do the instance indexing manually by incrementing the + // start address of the vertex buffer. + glVertexBindingDivisor(index, 1); + } else { + // Disable the vertex buffer instancing. + glVertexBindingDivisor(index, 0); + } } // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 38aa067b6..fb7476fb8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -94,6 +94,7 @@ struct FormatTuple { static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, false}, // A2B10G10R10U @@ -245,6 +246,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU // clang-format off MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, + MortonCopy<true, PixelFormat::ABGR8UI>, MortonCopy<true, PixelFormat::B5G6R5U>, MortonCopy<true, PixelFormat::A2B10G10R10U>, MortonCopy<true, PixelFormat::A1B5G5R5U>, @@ -299,6 +301,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU // clang-format off MortonCopy<false, PixelFormat::ABGR8U>, MortonCopy<false, PixelFormat::ABGR8S>, + MortonCopy<false, PixelFormat::ABGR8UI>, MortonCopy<false, PixelFormat::B5G6R5U>, MortonCopy<false, PixelFormat::A2B10G10R10U>, MortonCopy<false, PixelFormat::A1B5G5R5U>, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index beec01746..fc8b44219 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -25,59 +25,60 @@ struct SurfaceParams { enum class PixelFormat { ABGR8U = 0, ABGR8S = 1, - B5G6R5U = 2, - A2B10G10R10U = 3, - A1B5G5R5U = 4, - R8U = 5, - R8UI = 6, - RGBA16F = 7, - RGBA16U = 8, - RGBA16UI = 9, - R11FG11FB10F = 10, - RGBA32UI = 11, - DXT1 = 12, - DXT23 = 13, - DXT45 = 14, - DXN1 = 15, // This is also known as BC4 - DXN2UNORM = 16, - DXN2SNORM = 17, - BC7U = 18, - ASTC_2D_4X4 = 19, - G8R8U = 20, - G8R8S = 21, - BGRA8 = 22, - RGBA32F = 23, - RG32F = 24, - R32F = 25, - R16F = 26, - R16U = 27, - R16S = 28, - R16UI = 29, - R16I = 30, - RG16 = 31, - RG16F = 32, - RG16UI = 33, - RG16I = 34, - RG16S = 35, - RGB32F = 36, - SRGBA8 = 37, - RG8U = 38, - RG8S = 39, - RG32UI = 40, - R32UI = 41, + ABGR8UI = 2, + B5G6R5U = 3, + A2B10G10R10U = 4, + A1B5G5R5U = 5, + R8U = 6, + R8UI = 7, + RGBA16F = 8, + RGBA16U = 9, + RGBA16UI = 10, + R11FG11FB10F = 11, + RGBA32UI = 12, + DXT1 = 13, + DXT23 = 14, + DXT45 = 15, + DXN1 = 16, // This is also known as BC4 + DXN2UNORM = 17, + DXN2SNORM = 18, + BC7U = 19, + ASTC_2D_4X4 = 20, + G8R8U = 21, + G8R8S = 22, + BGRA8 = 23, + RGBA32F = 24, + RG32F = 25, + R32F = 26, + R16F = 27, + R16U = 28, + R16S = 29, + R16UI = 30, + R16I = 31, + RG16 = 32, + RG16F = 33, + RG16UI = 34, + RG16I = 35, + RG16S = 36, + RGB32F = 37, + SRGBA8 = 38, + RG8U = 39, + RG8S = 40, + RG32UI = 41, + R32UI = 42, MaxColorFormat, // Depth formats - Z32F = 42, - Z16 = 43, + Z32F = 43, + Z16 = 44, MaxDepthFormat, // DepthStencil formats - Z24S8 = 44, - S8Z24 = 45, - Z32FS8 = 46, + Z24S8 = 45, + S8Z24 = 46, + Z32FS8 = 47, MaxDepthStencilFormat, @@ -117,6 +118,7 @@ struct SurfaceParams { constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ 1, // ABGR8U 1, // ABGR8S + 1, // ABGR8UI 1, // B5G6R5U 1, // A2B10G10R10U 1, // A1B5G5R5U @@ -175,6 +177,7 @@ struct SurfaceParams { constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 32, // ABGR8U 32, // ABGR8S + 32, // ABGR8UI 16, // B5G6R5U 32, // A2B10G10R10U 16, // A1B5G5R5U @@ -257,6 +260,8 @@ struct SurfaceParams { return PixelFormat::ABGR8U; case Tegra::RenderTargetFormat::RGBA8_SNORM: return PixelFormat::ABGR8S; + case Tegra::RenderTargetFormat::RGBA8_UINT: + return PixelFormat::ABGR8UI; case Tegra::RenderTargetFormat::BGRA8_UNORM: return PixelFormat::BGRA8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: @@ -327,6 +332,8 @@ struct SurfaceParams { return PixelFormat::ABGR8U; case Tegra::Texture::ComponentType::SNORM: return PixelFormat::ABGR8S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::ABGR8UI; } LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); @@ -551,6 +558,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R16_UINT: case Tegra::RenderTargetFormat::RG32_UINT: case Tegra::RenderTargetFormat::R32_UINT: + case Tegra::RenderTargetFormat::RGBA8_UINT: return ComponentType::UInt; case Tegra::RenderTargetFormat::RG16_SINT: case Tegra::RenderTargetFormat::R16_SINT: diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bb01b3c27..ac6ccfec7 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -439,13 +439,12 @@ public: } declarations.AddNewLine(); - // Append the sampler2D array for the used textures. - size_t num_samplers = GetSamplers().size(); - if (num_samplers > 0) { - declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' + - std::to_string(num_samplers) + "];"); - declarations.AddNewLine(); + const auto& samplers = GetSamplers(); + for (const auto& sampler : samplers) { + declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() + + ';'); } + declarations.AddNewLine(); } /// Returns a list of constant buffer declarations @@ -457,13 +456,14 @@ public: } /// Returns a list of samplers used in the shader - std::vector<SamplerEntry> GetSamplers() const { + const std::vector<SamplerEntry>& GetSamplers() const { return used_samplers; } /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if /// necessary. - std::string AccessSampler(const Sampler& sampler) { + std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, + bool is_array) { size_t offset = static_cast<size_t>(sampler.index.Value()); // If this sampler has already been used, return the existing mapping. @@ -472,12 +472,13 @@ public: [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; }); if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array); return itr->GetName(); } // Otherwise create a new mapping for this sampler size_t next_index = used_samplers.size(); - SamplerEntry entry{stage, offset, next_index}; + SamplerEntry entry{stage, offset, next_index, type, is_array}; used_samplers.emplace_back(entry); return entry.GetName(); } @@ -542,6 +543,10 @@ private: // shader. ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); return "vec4(0, 0, uintBitsToFloat(instance_id.x), uintBitsToFloat(gl_VertexID))"; + case Attribute::Index::FrontFacing: + // TODO(Subv): Find out what the values are for the other elements. + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); + return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))"; default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; @@ -634,8 +639,8 @@ private: } /// Generates code representing a texture sampler. - std::string GetSampler(const Sampler& sampler) { - return regs.AccessSampler(sampler); + std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) { + return regs.AccessSampler(sampler, type, is_array); } /** @@ -743,6 +748,30 @@ private: return op->second; } + /** + * Transforms the input string GLSL operand into one that applies the abs() function and negates + * the output if necessary. When both abs and neg are true, the negation will be applied after + * taking the absolute value. + * @param operand The input operand to take the abs() of, negate, or both. + * @param abs Whether to apply the abs() function to the input operand. + * @param neg Whether to negate the input operand. + * @returns String corresponding to the operand after being transformed by the abs() and + * negation operations. + */ + static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) { + std::string result = operand; + + if (abs) { + result = "abs(" + result + ')'; + } + + if (neg) { + result = "-(" + result + ')'; + } + + return result; + } + /* * Returns whether the instruction at the specified offset is a 'sched' instruction. * Sched instructions always appear before a sequence of 3 instructions. @@ -756,28 +785,51 @@ private: } void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, - const std::string& op_b) { + const std::string& op_b, + Tegra::Shader::PredicateResultMode predicate_mode, + Tegra::Shader::Pred predicate) { + std::string result{}; switch (logic_op) { case LogicOperation::And: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1); + result = '(' + op_a + " & " + op_b + ')'; break; } case LogicOperation::Or: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1); + result = '(' + op_a + " | " + op_b + ')'; break; } case LogicOperation::Xor: { - regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1); + result = '(' + op_a + " ^ " + op_b + ')'; break; } case LogicOperation::PassB: { - regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1); + result = op_b; break; } default: LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op)); UNREACHABLE(); } + + if (dest != Tegra::Shader::Register::ZeroIndex) { + regs.SetRegisterToInteger(dest, true, 0, result, 1, 1); + } + + using Tegra::Shader::PredicateResultMode; + // Write the predicate value depending on the predicate mode. + switch (predicate_mode) { + case PredicateResultMode::None: + // Do nothing. + return; + case PredicateResultMode::NotZero: + // Set the predicate to true if the result is not zero. + SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0"); + break; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented predicate result mode: {}", + static_cast<u32>(predicate_mode)); + UNREACHABLE(); + } } void WriteTexsInstruction(const Instruction& instr, const std::string& coord, @@ -788,29 +840,56 @@ private: ++shader.scope; shader.AddLine(coord); - // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA - // goes into gpr28+0 and gpr28+1 - size_t texs_offset{}; - - size_t src_elem{}; - for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) { - size_t dest_elem{}; - for (unsigned elem = 0; elem < 2; ++elem) { - if (!instr.texs.IsComponentEnabled(src_elem++)) { - // Skip disabled components - continue; - } - regs.SetRegisterToFloat(dest, elem + texs_offset, texture, 1, 4, false, - dest_elem++); + // TEXS has two destination registers and a swizzle. The first two elements in the swizzle + // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 + + size_t written_components = 0; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) { + continue; } - if (!instr.texs.HasTwoDestinations()) { - // Skip the second destination - break; + if (written_components < 2) { + // Write the first two swizzle components to gpr0 and gpr0+1 + regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, + written_components % 2); + } else { + ASSERT(instr.texs.HasTwoDestinations()); + // Write the rest of the swizzle components to gpr28 and gpr28+1 + regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, + written_components % 2); } - texs_offset += 2; + ++written_components; } + + --shader.scope; + shader.AddLine('}'); + } + + /* + * Emits code to push the input target address to the SSY address stack, incrementing the stack + * top. + */ + void EmitPushToSSYStack(u32 target) { + shader.AddLine('{'); + ++shader.scope; + shader.AddLine("ssy_stack[ssy_stack_top] = " + std::to_string(target) + "u;"); + shader.AddLine("ssy_stack_top++;"); + --shader.scope; + shader.AddLine('}'); + } + + /* + * Emits code to pop an address from the SSY address stack, setting the jump address to the + * popped address and decrementing the stack top. + */ + void EmitPopFromSSYStack() { + shader.AddLine('{'); + ++shader.scope; + shader.AddLine("ssy_stack_top--;"); + shader.AddLine("jmp_to = ssy_stack[ssy_stack_top];"); + shader.AddLine("break;"); --shader.scope; shader.AddLine('}'); } @@ -859,13 +938,6 @@ private: switch (opcode->GetType()) { case OpCode::Type::Arithmetic: { std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - if (instr.alu.abs_a) { - op_a = "abs(" + op_a + ')'; - } - - if (instr.alu.negate_a) { - op_a = "-(" + op_a + ')'; - } std::string op_b; @@ -880,17 +952,10 @@ private: } } - if (instr.alu.abs_b) { - op_b = "abs(" + op_b + ')'; - } - - if (instr.alu.negate_b) { - op_b = "-(" + op_b + ')'; - } - switch (opcode->GetId()) { case OpCode::Id::MOV_C: case OpCode::Id::MOV_R: { + // MOV does not have neither 'abs' nor 'neg' bits. regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); break; } @@ -898,6 +963,8 @@ private: case OpCode::Id::FMUL_C: case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { + // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. + op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.saturate_d); break; @@ -905,11 +972,14 @@ private: case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { + op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); + op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.saturate_d); break; } case OpCode::Id::MUFU: { + op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); switch (instr.sub_op) { case SubOp::Cos: regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, @@ -949,6 +1019,9 @@ private: case OpCode::Id::FMNMX_C: case OpCode::Id::FMNMX_R: case OpCode::Id::FMNMX_IMM: { + op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); + op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); + std::string condition = GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); std::string parameters = op_a + ',' + op_b; @@ -962,7 +1035,7 @@ private: case OpCode::Id::RRO_R: case OpCode::Id::RRO_IMM: { // Currently RRO is only implemented as a register move. - // Usage of `abs_b` and `negate_b` here should also be correct. + op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; @@ -1099,7 +1172,9 @@ private: if (instr.alu.lop32i.invert_b) op_b = "~(" + op_b + ')'; - WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b); + WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, + Tegra::Shader::PredicateResultMode::None, + Tegra::Shader::Pred::UnusedIndex); break; } default: { @@ -1165,16 +1240,14 @@ private: case OpCode::Id::LOP_C: case OpCode::Id::LOP_R: case OpCode::Id::LOP_IMM: { - ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented"); - ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented"); - if (instr.alu.lop.invert_a) op_a = "~(" + op_a + ')'; if (instr.alu.lop.invert_b) op_b = "~(" + op_b + ')'; - WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b); + WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, + instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); break; } case OpCode::Id::IMNMX_C: @@ -1239,8 +1312,6 @@ private: break; } case OpCode::Type::Conversion: { - ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); - switch (opcode->GetId()) { case OpCode::Id::I2I_R: { ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); @@ -1437,10 +1508,29 @@ private: break; } case OpCode::Id::TEX: { - const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - const std::string sampler = GetSampler(instr.sampler); - const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented"); + std::string coord{}; + + switch (instr.tex.texture_type) { + case Tegra::Shader::TextureType::Texture2D: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + break; + } + case Tegra::Shader::TextureType::Texture3D: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + std::string z = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + break; + } + default: + UNIMPLEMENTED(); + } + + const std::string sampler = + GetSampler(instr.sampler, instr.tex.texture_type, instr.tex.array); // Add an extra scope and declare the texture coords inside to prevent // overwriting them in case they are used as outputs of the texs instruction. shader.AddLine("{"); @@ -1462,24 +1552,115 @@ private: break; } case OpCode::Id::TEXS: { - const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); - const std::string sampler = GetSampler(instr.sampler); - const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + std::string coord{}; + + switch (instr.texs.GetTextureType()) { + case Tegra::Shader::TextureType::Texture2D: { + if (instr.texs.IsArrayTexture()) { + std::string index = regs.GetRegisterAsInteger(instr.gpr8); + std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + std::string y = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; + } else { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + } + break; + } + case Tegra::Shader::TextureType::TextureCube: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + std::string z = regs.GetRegisterAsFloat(instr.gpr20); + coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; + break; + } + default: + UNIMPLEMENTED(); + } + const std::string sampler = GetSampler(instr.sampler, instr.texs.GetTextureType(), + instr.texs.IsArrayTexture()); const std::string texture = "texture(" + sampler + ", coords)"; WriteTexsInstruction(instr, coord, texture); break; } case OpCode::Id::TLDS: { - const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); - const std::string op_b = regs.GetRegisterAsInteger(instr.gpr20); - const std::string sampler = GetSampler(instr.sampler); - const std::string coord = "ivec2 coords = ivec2(" + op_a + ", " + op_b + ");"; + ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D); + ASSERT(instr.tlds.IsArrayTexture() == false); + std::string coord{}; + + switch (instr.tlds.GetTextureType()) { + case Tegra::Shader::TextureType::Texture2D: { + if (instr.tlds.IsArrayTexture()) { + UNIMPLEMENTED(); + } else { + std::string x = regs.GetRegisterAsInteger(instr.gpr8); + std::string y = regs.GetRegisterAsInteger(instr.gpr20); + coord = "ivec2 coords = ivec2(" + x + ", " + y + ");"; + } + break; + } + default: + UNIMPLEMENTED(); + } + const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(), + instr.tlds.IsArrayTexture()); const std::string texture = "texelFetch(" + sampler + ", coords, 0)"; WriteTexsInstruction(instr, coord, texture); break; } + case OpCode::Id::TLD4: { + ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D); + ASSERT(instr.tld4.array == 0); + std::string coord{}; + + switch (instr.tld4.texture_type) { + case Tegra::Shader::TextureType::Texture2D: { + std::string x = regs.GetRegisterAsFloat(instr.gpr8); + std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + coord = "vec2 coords = vec2(" + x + ", " + y + ");"; + break; + } + default: + UNIMPLEMENTED(); + } + + const std::string sampler = + GetSampler(instr.sampler, instr.tld4.texture_type, instr.tld4.array); + // Add an extra scope and declare the texture coords inside to prevent + // overwriting them in case they are used as outputs of the texs instruction. + shader.AddLine("{"); + ++shader.scope; + shader.AddLine(coord); + const std::string texture = "textureGather(" + sampler + ", coords, " + + std::to_string(instr.tld4.component) + ')'; + + size_t dest_elem{}; + for (size_t elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); + ++dest_elem; + } + --shader.scope; + shader.AddLine("}"); + break; + } + case OpCode::Id::TLD4S: { + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. + const std::string sampler = + GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + const std::string texture = "textureGather(" + sampler + ", coords, " + + std::to_string(instr.tld4s.component) + ')'; + WriteTexsInstruction(instr, coord, texture); + break; + } default: { LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName()); UNREACHABLE(); @@ -1843,13 +2024,13 @@ private: ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); u32 target = offset + instr.bra.GetBranchTarget(); - shader.AddLine("ssy_target = " + std::to_string(target) + "u;"); + EmitPushToSSYStack(target); break; } case OpCode::Id::SYNC: { // The SYNC opcode jumps to the address previously set by the SSY opcode ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); - shader.AddLine("{ jmp_to = ssy_target; break; }"); + EmitPopFromSSYStack(); break; } case OpCode::Id::DEPBAR: { @@ -1920,7 +2101,13 @@ private: } else { labels.insert(subroutine.begin); shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); - shader.AddLine("uint ssy_target = 0u;"); + + // TODO(Subv): Figure out the actual depth of the SSY stack, for now it seems + // unlikely that shaders will use 20 nested SSYs. + constexpr u32 SSY_STACK_SIZE = 20; + shader.AddLine("uint ssy_stack[" + std::to_string(SSY_STACK_SIZE) + "];"); + shader.AddLine("uint ssy_stack_top = 0u;"); + shader.AddLine("while (true) {"); ++shader.scope; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 4729ce0fc..db48da645 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -11,6 +11,7 @@ #include <vector> #include "common/common_types.h" #include "common/hash.h" +#include "video_core/engines/shader_bytecode.h" namespace GLShader { @@ -72,8 +73,9 @@ class SamplerEntry { using Maxwell = Tegra::Engines::Maxwell3D::Regs; public: - SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index) - : offset(offset), stage(stage), sampler_index(index) {} + SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index, + Tegra::Shader::TextureType type, bool is_array) + : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} size_t GetOffset() const { return offset; @@ -88,8 +90,41 @@ public: } std::string GetName() const { - return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' + - std::to_string(sampler_index) + ']'; + return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' + + std::to_string(sampler_index); + } + + std::string GetTypeString() const { + using Tegra::Shader::TextureType; + std::string glsl_type; + + switch (type) { + case TextureType::Texture1D: + glsl_type = "sampler1D"; + break; + case TextureType::Texture2D: + glsl_type = "sampler2D"; + break; + case TextureType::Texture3D: + glsl_type = "sampler3D"; + break; + case TextureType::TextureCube: + glsl_type = "samplerCube"; + break; + default: + UNIMPLEMENTED(); + } + if (is_array) + glsl_type += "Array"; + return glsl_type; + } + + Tegra::Shader::TextureType GetType() const { + return type; + } + + bool IsArray() const { + return is_array; } static std::string GetArrayName(Maxwell::ShaderStage stage) { @@ -100,11 +135,14 @@ private: static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = { "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs", }; + /// Offset in TSC memory from which to read the sampler object, as specified by the sampling /// instruction. size_t offset; - Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. - size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. + Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. + size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. + Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) + bool is_array; ///< Whether the texture is being sampled as an array texture or not. }; struct ShaderEntries { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 8f719fdd8..5d91a0c2f 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -147,6 +147,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to // manually mix them. However the shader part of this is not yet implemented. return GL_CLAMP_TO_BORDER; + case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: + return GL_MIRROR_CLAMP_TO_EDGE; } LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); UNREACHABLE(); |