summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2124
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp90
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h59
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp209
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h93
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_device.h84
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp572
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h169
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp481
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h61
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp27
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp989
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h172
-rw-r--r--src/video_core/renderer_opengl/gl_shader_context.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2986
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp482
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h176
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h185
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp123
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h89
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp364
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h53
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp72
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h6
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp27
33 files changed, 2479 insertions, 7766 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
deleted file mode 100644
index e8d8d2aa5..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ /dev/null
@@ -1,2124 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include <cstddef>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
-// GLASM lacks booleans, so they have to be implemented as integers.
-// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
-// select between two values, because -1 will be evaluated as true and 0 as false.
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using namespace VideoCommon::Shader;
-using Operation = const OperationNode&;
-
-constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
-
-char Swizzle(std::size_t component) {
- static constexpr std::string_view SWIZZLE{"xyzw"};
- return SWIZZLE.at(component);
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-std::string_view Modifiers(Operation operation) {
- const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
- if (meta && meta->precise) {
- return ".PREC";
- }
- return "";
-}
-
-std::string_view GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "";
- case PixelImap::Constant:
- return "FLAT ";
- case PixelImap::ScreenLinear:
- return "NOPERSPECTIVE ";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
-}
-
-std::string_view ImageType(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "BUFFER";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "ARRAY1D";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "ARRAY2D";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- }
- UNREACHABLE();
- return {};
-}
-
-std::string_view StackName(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "SSY";
- case MetaStackClass::Pbk:
- return "PBK";
- }
- UNREACHABLE();
- return "";
-};
-
-std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
- switch (topology) {
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
- return "POINTS";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
- return "LINES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
- return "LINES_ADJACENCY";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
- return "TRIANGLES";
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
- return "TRIANGLES_ADJACENCY";
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return "POINTS";
- }
-}
-
-std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "POINTS";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "LINE_STRIP";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "TRIANGLE_STRIP";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-std::string_view StageInputName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- case ShaderType::Geometry:
- return "vertex";
- case ShaderType::Fragment:
- return "fragment";
- case ShaderType::Compute:
- return "invocation";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-std::string TextureType(const MetaTexture& meta) {
- if (meta.sampler.is_buffer) {
- return "BUFFER";
- }
- std::string type;
- if (meta.sampler.is_shadow) {
- type += "SHADOW";
- }
- if (meta.sampler.is_array) {
- type += "ARRAY";
- }
- type += [&meta] {
- switch (meta.sampler.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return "1D";
- case Tegra::Shader::TextureType::Texture2D:
- return "2D";
- case Tegra::Shader::TextureType::Texture3D:
- return "3D";
- case Tegra::Shader::TextureType::TextureCube:
- return "CUBE";
- }
- UNREACHABLE();
- return "2D";
- }();
- return type;
-}
-
-class ARBDecompiler final {
-public:
- explicit ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier);
-
- std::string Code() const {
- return shader_source;
- }
-
-private:
- void DefineGlobalMemory();
-
- void DeclareHeader();
- void DeclareVertex();
- void DeclareGeometry();
- void DeclareFragment();
- void DeclareCompute();
- void DeclareInputAttributes();
- void DeclareOutputAttributes();
- void DeclareLocalMemory();
- void DeclareGlobalMemory();
- void DeclareConstantBuffers();
- void DeclareRegisters();
- void DeclareTemporaries();
- void DeclarePredicates();
- void DeclareInternalFlags();
-
- void InitializeVariables();
-
- void DecompileAST();
- void DecompileBranchMode();
-
- void VisitAST(const ASTNode& node);
- std::string VisitExpression(const Expr& node);
-
- void VisitBlock(const NodeBlock& bb);
-
- std::string Visit(const Node& node);
-
- std::tuple<std::string, std::string, std::size_t> BuildCoords(Operation);
- std::string BuildAoffi(Operation);
- std::string GlobalMemoryPointer(const GmemNode& gmem);
- void Exit();
-
- std::string Assign(Operation);
- std::string Select(Operation);
- std::string FClamp(Operation);
- std::string FCastHalf0(Operation);
- std::string FCastHalf1(Operation);
- std::string FSqrt(Operation);
- std::string FSwizzleAdd(Operation);
- std::string HAdd2(Operation);
- std::string HMul2(Operation);
- std::string HFma2(Operation);
- std::string HAbsolute(Operation);
- std::string HNegate(Operation);
- std::string HClamp(Operation);
- std::string HCastFloat(Operation);
- std::string HUnpack(Operation);
- std::string HMergeF32(Operation);
- std::string HMergeH0(Operation);
- std::string HMergeH1(Operation);
- std::string HPack2(Operation);
- std::string LogicalAssign(Operation);
- std::string LogicalPick2(Operation);
- std::string LogicalAnd2(Operation);
- std::string FloatOrdered(Operation);
- std::string FloatUnordered(Operation);
- std::string LogicalAddCarry(Operation);
- std::string Texture(Operation);
- std::string TextureGather(Operation);
- std::string TextureQueryDimensions(Operation);
- std::string TextureQueryLod(Operation);
- std::string TexelFetch(Operation);
- std::string TextureGradient(Operation);
- std::string ImageLoad(Operation);
- std::string ImageStore(Operation);
- std::string Branch(Operation);
- std::string BranchIndirect(Operation);
- std::string PushFlowStack(Operation);
- std::string PopFlowStack(Operation);
- std::string Exit(Operation);
- std::string Discard(Operation);
- std::string EmitVertex(Operation);
- std::string EndPrimitive(Operation);
- std::string InvocationId(Operation);
- std::string YNegate(Operation);
- std::string ThreadId(Operation);
- std::string ShuffleIndexed(Operation);
- std::string Barrier(Operation);
- std::string MemoryBarrierGroup(Operation);
- std::string MemoryBarrierGlobal(Operation);
-
- template <const std::string_view& op>
- std::string Unary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Binary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]));
- return temporary;
- }
-
- template <const std::string_view& op>
- std::string Trinary(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
- Visit(operation[1]), Visit(operation[2]));
- return temporary;
- }
-
- template <const std::string_view& op, bool unordered>
- std::string FloatComparison(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NE.x), -1;", temporary);
-
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- if constexpr (unordered) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), -1;", temporary);
- } else if (op == SNE_F) {
- AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
- AddLine("TRUNC.U.CC RC.x, RC.x;");
- AddLine("MOV.S {} (NE.x), 0;", temporary);
- }
- return temporary;
- }
-
- template <const std::string_view& op, bool is_nan>
- std::string HalfComparison(Operation operation) {
- std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string op_a = Visit(operation[0]);
- const std::string op_b = Visit(operation[1]);
- AddLine("UP2H.F {}, {};", tmp1, op_a);
- AddLine("UP2H.F {}, {};", tmp2, op_b);
- AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
- AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
- AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
- AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
- AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
- if constexpr (is_nan) {
- AddLine("MOVC.F RC.x, {};", op_a);
- AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
- AddLine("MOVC.F RC.x, {};", op_b);
- AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
- }
- return tmp1;
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string AtomicImage(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
-
- AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
- image_id, ImageType(meta.image.type));
- return fmt::format("{}.x", coord);
- }
-
- template <const std::string_view& op, const std::string_view& type>
- std::string Atomic(Operation operation) {
- std::string temporary = AllocTemporary();
- std::string address;
- std::string_view opname;
- bool robust = false;
- if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
- address = GlobalMemoryPointer(*gmem);
- opname = "ATOM";
- robust = true;
- } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
- address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
- opname = "ATOMS";
- } else {
- UNREACHABLE();
- return "{0, 0, 0, 0}";
- }
- if (robust) {
- AddLine("IF NE.x;");
- }
- AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
- if (robust) {
- AddLine("ELSE;");
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("ENDIF;");
- }
- return temporary;
- }
-
- template <char type>
- std::string Negate(Operation operation) {
- std::string temporary = AllocTemporary();
- if constexpr (type == 'F') {
- AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
- } else {
- AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
- }
- return temporary;
- }
-
- template <char type>
- std::string Absolute(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
- return temporary;
- }
-
- template <char type>
- std::string BitfieldInsert(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
- AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
- Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char type>
- std::string BitfieldExtract(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
- AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
- AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
- }
-
- template <char swizzle>
- std::string LocalInvocationId(Operation) {
- return fmt::format("invocation.localid.{}", swizzle);
- }
-
- template <char swizzle>
- std::string WorkGroupId(Operation) {
- return fmt::format("invocation.groupid.{}", swizzle);
- }
-
- template <char c1, char c2>
- std::string ThreadMask(Operation) {
- return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
- }
-
- template <typename... Args>
- void AddExpression(std::string_view text, Args&&... args) {
- shader_source += fmt::format(fmt::runtime(text), std::forward<Args>(args)...);
- }
-
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(text, std::forward<Args>(args)...);
- shader_source += '\n';
- }
-
- std::string AllocLongVectorTemporary() {
- max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
- return fmt::format("L{}", num_long_temporaries++);
- }
-
- std::string AllocLongTemporary() {
- return fmt::format("{}.x", AllocLongVectorTemporary());
- }
-
- std::string AllocVectorTemporary() {
- max_temporaries = std::max(max_temporaries, num_temporaries + 1);
- return fmt::format("T{}", num_temporaries++);
- }
-
- std::string AllocTemporary() {
- return fmt::format("{}.x", AllocVectorTemporary());
- }
-
- void ResetTemporaries() noexcept {
- num_temporaries = 0;
- num_long_temporaries = 0;
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
-
- std::size_t num_temporaries = 0;
- std::size_t max_temporaries = 0;
-
- std::size_t num_long_temporaries = 0;
- std::size_t max_long_temporaries = 0;
-
- std::map<GlobalMemoryBase, u32> global_memory_names;
-
- std::string shader_source;
-
- static constexpr std::string_view ADD_F32 = "ADD.F32";
- static constexpr std::string_view ADD_S = "ADD.S";
- static constexpr std::string_view ADD_U = "ADD.U";
- static constexpr std::string_view MUL_F32 = "MUL.F32";
- static constexpr std::string_view MUL_S = "MUL.S";
- static constexpr std::string_view MUL_U = "MUL.U";
- static constexpr std::string_view DIV_F32 = "DIV.F32";
- static constexpr std::string_view DIV_S = "DIV.S";
- static constexpr std::string_view DIV_U = "DIV.U";
- static constexpr std::string_view MAD_F32 = "MAD.F32";
- static constexpr std::string_view RSQ_F32 = "RSQ.F32";
- static constexpr std::string_view COS_F32 = "COS.F32";
- static constexpr std::string_view SIN_F32 = "SIN.F32";
- static constexpr std::string_view EX2_F32 = "EX2.F32";
- static constexpr std::string_view LG2_F32 = "LG2.F32";
- static constexpr std::string_view SLT_F = "SLT.F32";
- static constexpr std::string_view SLT_S = "SLT.S";
- static constexpr std::string_view SLT_U = "SLT.U";
- static constexpr std::string_view SEQ_F = "SEQ.F32";
- static constexpr std::string_view SEQ_S = "SEQ.S";
- static constexpr std::string_view SEQ_U = "SEQ.U";
- static constexpr std::string_view SLE_F = "SLE.F32";
- static constexpr std::string_view SLE_S = "SLE.S";
- static constexpr std::string_view SLE_U = "SLE.U";
- static constexpr std::string_view SGT_F = "SGT.F32";
- static constexpr std::string_view SGT_S = "SGT.S";
- static constexpr std::string_view SGT_U = "SGT.U";
- static constexpr std::string_view SNE_F = "SNE.F32";
- static constexpr std::string_view SNE_S = "SNE.S";
- static constexpr std::string_view SNE_U = "SNE.U";
- static constexpr std::string_view SGE_F = "SGE.F32";
- static constexpr std::string_view SGE_S = "SGE.S";
- static constexpr std::string_view SGE_U = "SGE.U";
- static constexpr std::string_view AND_S = "AND.S";
- static constexpr std::string_view AND_U = "AND.U";
- static constexpr std::string_view TRUNC_F = "TRUNC.F";
- static constexpr std::string_view TRUNC_S = "TRUNC.S";
- static constexpr std::string_view TRUNC_U = "TRUNC.U";
- static constexpr std::string_view SHL_S = "SHL.S";
- static constexpr std::string_view SHL_U = "SHL.U";
- static constexpr std::string_view SHR_S = "SHR.S";
- static constexpr std::string_view SHR_U = "SHR.U";
- static constexpr std::string_view OR_S = "OR.S";
- static constexpr std::string_view OR_U = "OR.U";
- static constexpr std::string_view XOR_S = "XOR.S";
- static constexpr std::string_view XOR_U = "XOR.U";
- static constexpr std::string_view NOT_S = "NOT.S";
- static constexpr std::string_view NOT_U = "NOT.U";
- static constexpr std::string_view BTC_S = "BTC.S";
- static constexpr std::string_view BTC_U = "BTC.U";
- static constexpr std::string_view BTFM_S = "BTFM.S";
- static constexpr std::string_view BTFM_U = "BTFM.U";
- static constexpr std::string_view ROUND_F = "ROUND.F";
- static constexpr std::string_view CEIL_F = "CEIL.F";
- static constexpr std::string_view FLR_F = "FLR.F";
- static constexpr std::string_view I2F_S = "I2F.S";
- static constexpr std::string_view I2F_U = "I2F.U";
- static constexpr std::string_view MIN_F = "MIN.F";
- static constexpr std::string_view MIN_S = "MIN.S";
- static constexpr std::string_view MIN_U = "MIN.U";
- static constexpr std::string_view MAX_F = "MAX.F";
- static constexpr std::string_view MAX_S = "MAX.S";
- static constexpr std::string_view MAX_U = "MAX.U";
- static constexpr std::string_view MOV_U = "MOV.U";
- static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
- static constexpr std::string_view TGALL_U = "TGALL.U";
- static constexpr std::string_view TGANY_U = "TGANY.U";
- static constexpr std::string_view TGEQ_U = "TGEQ.U";
- static constexpr std::string_view EXCH = "EXCH";
- static constexpr std::string_view ADD = "ADD";
- static constexpr std::string_view MIN = "MIN";
- static constexpr std::string_view MAX = "MAX";
- static constexpr std::string_view AND = "AND";
- static constexpr std::string_view OR = "OR";
- static constexpr std::string_view XOR = "XOR";
- static constexpr std::string_view U32 = "U32";
- static constexpr std::string_view S32 = "S32";
-
- static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
- using DecompilerType = std::string (ARBDecompiler::*)(Operation);
- static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
- &ARBDecompiler::Assign,
-
- &ARBDecompiler::Select,
-
- &ARBDecompiler::Binary<ADD_F32>,
- &ARBDecompiler::Binary<MUL_F32>,
- &ARBDecompiler::Binary<DIV_F32>,
- &ARBDecompiler::Trinary<MAD_F32>,
- &ARBDecompiler::Negate<'F'>,
- &ARBDecompiler::Absolute<'F'>,
- &ARBDecompiler::FClamp,
- &ARBDecompiler::FCastHalf0,
- &ARBDecompiler::FCastHalf1,
- &ARBDecompiler::Binary<MIN_F>,
- &ARBDecompiler::Binary<MAX_F>,
- &ARBDecompiler::Unary<COS_F32>,
- &ARBDecompiler::Unary<SIN_F32>,
- &ARBDecompiler::Unary<EX2_F32>,
- &ARBDecompiler::Unary<LG2_F32>,
- &ARBDecompiler::Unary<RSQ_F32>,
- &ARBDecompiler::FSqrt,
- &ARBDecompiler::Unary<ROUND_F>,
- &ARBDecompiler::Unary<FLR_F>,
- &ARBDecompiler::Unary<CEIL_F>,
- &ARBDecompiler::Unary<TRUNC_F>,
- &ARBDecompiler::Unary<I2F_S>,
- &ARBDecompiler::Unary<I2F_U>,
- &ARBDecompiler::FSwizzleAdd,
-
- &ARBDecompiler::Binary<ADD_S>,
- &ARBDecompiler::Binary<MUL_S>,
- &ARBDecompiler::Binary<DIV_S>,
- &ARBDecompiler::Negate<'S'>,
- &ARBDecompiler::Absolute<'S'>,
- &ARBDecompiler::Binary<MIN_S>,
- &ARBDecompiler::Binary<MAX_S>,
-
- &ARBDecompiler::Unary<TRUNC_S>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_S>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_S>,
- &ARBDecompiler::Binary<AND_S>,
- &ARBDecompiler::Binary<OR_S>,
- &ARBDecompiler::Binary<XOR_S>,
- &ARBDecompiler::Unary<NOT_S>,
- &ARBDecompiler::BitfieldInsert<'S'>,
- &ARBDecompiler::BitfieldExtract<'S'>,
- &ARBDecompiler::Unary<BTC_S>,
- &ARBDecompiler::Unary<BTFM_S>,
-
- &ARBDecompiler::Binary<ADD_U>,
- &ARBDecompiler::Binary<MUL_U>,
- &ARBDecompiler::Binary<DIV_U>,
- &ARBDecompiler::Binary<MIN_U>,
- &ARBDecompiler::Binary<MAX_U>,
- &ARBDecompiler::Unary<TRUNC_U>,
- &ARBDecompiler::Unary<MOV_U>,
- &ARBDecompiler::Binary<SHL_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<SHR_U>,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::BitfieldInsert<'U'>,
- &ARBDecompiler::BitfieldExtract<'U'>,
- &ARBDecompiler::Unary<BTC_U>,
- &ARBDecompiler::Unary<BTFM_U>,
-
- &ARBDecompiler::HAdd2,
- &ARBDecompiler::HMul2,
- &ARBDecompiler::HFma2,
- &ARBDecompiler::HAbsolute,
- &ARBDecompiler::HNegate,
- &ARBDecompiler::HClamp,
- &ARBDecompiler::HCastFloat,
- &ARBDecompiler::HUnpack,
- &ARBDecompiler::HMergeF32,
- &ARBDecompiler::HMergeH0,
- &ARBDecompiler::HMergeH1,
- &ARBDecompiler::HPack2,
-
- &ARBDecompiler::LogicalAssign,
- &ARBDecompiler::Binary<AND_U>,
- &ARBDecompiler::Binary<OR_U>,
- &ARBDecompiler::Binary<XOR_U>,
- &ARBDecompiler::Unary<NOT_U>,
- &ARBDecompiler::LogicalPick2,
- &ARBDecompiler::LogicalAnd2,
-
- &ARBDecompiler::FloatComparison<SLT_F, false>,
- &ARBDecompiler::FloatComparison<SEQ_F, false>,
- &ARBDecompiler::FloatComparison<SLE_F, false>,
- &ARBDecompiler::FloatComparison<SGT_F, false>,
- &ARBDecompiler::FloatComparison<SNE_F, false>,
- &ARBDecompiler::FloatComparison<SGE_F, false>,
- &ARBDecompiler::FloatOrdered,
- &ARBDecompiler::FloatUnordered,
- &ARBDecompiler::FloatComparison<SLT_F, true>,
- &ARBDecompiler::FloatComparison<SEQ_F, true>,
- &ARBDecompiler::FloatComparison<SLE_F, true>,
- &ARBDecompiler::FloatComparison<SGT_F, true>,
- &ARBDecompiler::FloatComparison<SNE_F, true>,
- &ARBDecompiler::FloatComparison<SGE_F, true>,
-
- &ARBDecompiler::Binary<SLT_S>,
- &ARBDecompiler::Binary<SEQ_S>,
- &ARBDecompiler::Binary<SLE_S>,
- &ARBDecompiler::Binary<SGT_S>,
- &ARBDecompiler::Binary<SNE_S>,
- &ARBDecompiler::Binary<SGE_S>,
-
- &ARBDecompiler::Binary<SLT_U>,
- &ARBDecompiler::Binary<SEQ_U>,
- &ARBDecompiler::Binary<SLE_U>,
- &ARBDecompiler::Binary<SGT_U>,
- &ARBDecompiler::Binary<SNE_U>,
- &ARBDecompiler::Binary<SGE_U>,
-
- &ARBDecompiler::LogicalAddCarry,
-
- &ARBDecompiler::HalfComparison<SLT_F, false>,
- &ARBDecompiler::HalfComparison<SEQ_F, false>,
- &ARBDecompiler::HalfComparison<SLE_F, false>,
- &ARBDecompiler::HalfComparison<SGT_F, false>,
- &ARBDecompiler::HalfComparison<SNE_F, false>,
- &ARBDecompiler::HalfComparison<SGE_F, false>,
- &ARBDecompiler::HalfComparison<SLT_F, true>,
- &ARBDecompiler::HalfComparison<SEQ_F, true>,
- &ARBDecompiler::HalfComparison<SLE_F, true>,
- &ARBDecompiler::HalfComparison<SGT_F, true>,
- &ARBDecompiler::HalfComparison<SNE_F, true>,
- &ARBDecompiler::HalfComparison<SGE_F, true>,
-
- &ARBDecompiler::Texture,
- &ARBDecompiler::Texture,
- &ARBDecompiler::TextureGather,
- &ARBDecompiler::TextureQueryDimensions,
- &ARBDecompiler::TextureQueryLod,
- &ARBDecompiler::TexelFetch,
- &ARBDecompiler::TextureGradient,
-
- &ARBDecompiler::ImageLoad,
- &ARBDecompiler::ImageStore,
-
- &ARBDecompiler::AtomicImage<ADD, U32>,
- &ARBDecompiler::AtomicImage<AND, U32>,
- &ARBDecompiler::AtomicImage<OR, U32>,
- &ARBDecompiler::AtomicImage<XOR, U32>,
- &ARBDecompiler::AtomicImage<EXCH, U32>,
-
- &ARBDecompiler::Atomic<EXCH, U32>,
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<EXCH, S32>,
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Atomic<ADD, U32>,
- &ARBDecompiler::Atomic<MIN, U32>,
- &ARBDecompiler::Atomic<MAX, U32>,
- &ARBDecompiler::Atomic<AND, U32>,
- &ARBDecompiler::Atomic<OR, U32>,
- &ARBDecompiler::Atomic<XOR, U32>,
-
- &ARBDecompiler::Atomic<ADD, S32>,
- &ARBDecompiler::Atomic<MIN, S32>,
- &ARBDecompiler::Atomic<MAX, S32>,
- &ARBDecompiler::Atomic<AND, S32>,
- &ARBDecompiler::Atomic<OR, S32>,
- &ARBDecompiler::Atomic<XOR, S32>,
-
- &ARBDecompiler::Branch,
- &ARBDecompiler::BranchIndirect,
- &ARBDecompiler::PushFlowStack,
- &ARBDecompiler::PopFlowStack,
- &ARBDecompiler::Exit,
- &ARBDecompiler::Discard,
-
- &ARBDecompiler::EmitVertex,
- &ARBDecompiler::EndPrimitive,
-
- &ARBDecompiler::InvocationId,
- &ARBDecompiler::YNegate,
- &ARBDecompiler::LocalInvocationId<'x'>,
- &ARBDecompiler::LocalInvocationId<'y'>,
- &ARBDecompiler::LocalInvocationId<'z'>,
- &ARBDecompiler::WorkGroupId<'x'>,
- &ARBDecompiler::WorkGroupId<'y'>,
- &ARBDecompiler::WorkGroupId<'z'>,
-
- &ARBDecompiler::Unary<TGBALLOT_U>,
- &ARBDecompiler::Unary<TGALL_U>,
- &ARBDecompiler::Unary<TGANY_U>,
- &ARBDecompiler::Unary<TGEQ_U>,
-
- &ARBDecompiler::ThreadId,
- &ARBDecompiler::ThreadMask<'e', 'q'>,
- &ARBDecompiler::ThreadMask<'g', 'e'>,
- &ARBDecompiler::ThreadMask<'g', 't'>,
- &ARBDecompiler::ThreadMask<'l', 'e'>,
- &ARBDecompiler::ThreadMask<'l', 't'>,
- &ARBDecompiler::ShuffleIndexed,
-
- &ARBDecompiler::Barrier,
- &ARBDecompiler::MemoryBarrierGroup,
- &ARBDecompiler::MemoryBarrierGlobal,
- };
-};
-
-ARBDecompiler::ARBDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_} {
- DefineGlobalMemory();
-
- AddLine("TEMP RC;");
- AddLine("TEMP FSWZA[4];");
- AddLine("TEMP FSWZB[4];");
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
- AddLine("END");
-
- const std::string code = std::move(shader_source);
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareLocalMemory();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareRegisters();
- DeclareTemporaries();
- DeclarePredicates();
- DeclareInternalFlags();
-
- shader_source += code;
-}
-
-std::string_view HeaderStageName(ShaderType stage) {
- switch (stage) {
- case ShaderType::Vertex:
- return "vp";
- case ShaderType::Geometry:
- return "gp";
- case ShaderType::Fragment:
- return "fp";
- case ShaderType::Compute:
- return "cp";
- default:
- UNREACHABLE();
- return "";
- }
-}
-
-void ARBDecompiler::DefineGlobalMemory() {
- u32 binding = 0;
- for (const auto& pair : ir.GetGlobalMemory()) {
- const GlobalMemoryBase base = pair.first;
- global_memory_names.emplace(base, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareHeader() {
- AddLine("!!NV{}5.0", HeaderStageName(stage));
- // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
- AddLine("OPTION NV_internal;");
- AddLine("OPTION NV_gpu_program_fp64;");
- AddLine("OPTION NV_shader_thread_group;");
- if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
- AddLine("OPTION NV_shader_thread_shuffle;");
- }
- if (stage == ShaderType::Vertex) {
- if (device.HasNvViewportArray2()) {
- AddLine("OPTION NV_viewport_array2;");
- }
- }
- if (stage == ShaderType::Fragment) {
- AddLine("OPTION ARB_draw_buffers;");
- }
- if (device.HasImageLoadFormatted()) {
- AddLine("OPTION EXT_shader_image_load_formatted;");
- }
-}
-
-void ARBDecompiler::DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
- AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
-}
-
-void ARBDecompiler::DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
- const auto& info = registry.GetGraphicsInfo();
- const auto& header = ir.GetHeader();
- AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
- AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
- AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
- AddLine("ATTRIB vertex_position = vertex.position;");
-}
-
-void ARBDecompiler::DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- AddLine("OUTPUT result_color7 = result.color[7];");
- AddLine("OUTPUT result_color6 = result.color[6];");
- AddLine("OUTPUT result_color5 = result.color[5];");
- AddLine("OUTPUT result_color4 = result.color[4];");
- AddLine("OUTPUT result_color3 = result.color[3];");
- AddLine("OUTPUT result_color2 = result.color[2];");
- AddLine("OUTPUT result_color1 = result.color[1];");
- AddLine("OUTPUT result_color0 = result.color;");
-}
-
-void ARBDecompiler::DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const ComputeInfo& info = registry.GetComputeInfo();
- AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
- info.workgroup_size[2]);
- if (info.shared_memory_size_in_words == 0) {
- return;
- }
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- u32 size_in_bytes = info.shared_memory_size_in_words * 4;
- if (size_in_bytes > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size_in_bytes, limit);
- size_in_bytes = limit;
- }
-
- AddLine("SHARED_MEMORY {};", size_in_bytes);
- AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
-}
-
-void ARBDecompiler::DeclareInputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- const std::string_view stage_name = StageInputName(stage);
- for (const auto attribute : ir.GetInputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
-
- std::string_view suffix;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix = GetInputFlags(input_mode);
- }
- AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
- index);
- }
-}
-
-void ARBDecompiler::DeclareOutputAttributes() {
- if (stage == ShaderType::Compute) {
- return;
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
- }
-}
-
-void ARBDecompiler::DeclareLocalMemory() {
- u64 size = 0;
- if (stage == ShaderType::Compute) {
- size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- size = ir.GetHeader().GetLocalMemorySize();
- }
- if (size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(size, 4) / 4;
- AddLine("TEMP lmem[{}];", element_count);
-}
-
-void ARBDecompiler::DeclareGlobalMemory() {
- const size_t num_entries = ir.GetGlobalMemory().size();
- if (num_entries > 0) {
- AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
- }
-}
-
-void ARBDecompiler::DeclareConstantBuffers() {
- u32 binding = 0;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
- ++binding;
- }
-}
-
-void ARBDecompiler::DeclareRegisters() {
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("TEMP R{};", gpr);
- }
-}
-
-void ARBDecompiler::DeclareTemporaries() {
- for (std::size_t i = 0; i < max_temporaries; ++i) {
- AddLine("TEMP T{};", i);
- }
- for (std::size_t i = 0; i < max_long_temporaries; ++i) {
- AddLine("LONG TEMP L{};", i);
- }
-}
-
-void ARBDecompiler::DeclarePredicates() {
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("TEMP P{};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DeclareInternalFlags() {
- for (const char* name : INTERNAL_FLAG_NAMES) {
- AddLine("TEMP {};", name);
- }
-}
-
-void ARBDecompiler::InitializeVariables() {
- AddLine("MOV.F32 FSWZA[0], -1;");
- AddLine("MOV.F32 FSWZA[1], 1;");
- AddLine("MOV.F32 FSWZA[2], -1;");
- AddLine("MOV.F32 FSWZA[3], 0;");
- AddLine("MOV.F32 FSWZB[0], -1;");
- AddLine("MOV.F32 FSWZB[1], -1;");
- AddLine("MOV.F32 FSWZB[2], 1;");
- AddLine("MOV.F32 FSWZB[3], -1;");
-
- if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
- AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
- }
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (!IsGenericAttribute(attribute)) {
- continue;
- }
- const u32 index = GetGenericAttributeIndex(attribute);
- AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
- }
- for (const u32 gpr : ir.GetRegisters()) {
- AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
- }
- for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
- AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
- }
-}
-
-void ARBDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("TEMP F{};", i);
- }
- for (u32 i = 0; i < num_flow_variables; ++i) {
- AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
- }
-
- InitializeVariables();
-
- VisitAST(ir.GetASTProgram());
-}
-
-void ARBDecompiler::DecompileBranchMode() {
- static constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
- AddLine("TEMP SSY_TOP;");
- AddLine("TEMP PBK_TOP;");
- }
-
- AddLine("TEMP PC;");
-
- if (!ir.IsFlowStackDisabled()) {
- AddLine("MOV.U SSY_TOP.x, 0;");
- AddLine("MOV.U PBK_TOP.x, 0;");
- }
-
- InitializeVariables();
-
- const auto basic_block_end = ir.GetBasicBlocks().end();
- auto basic_block_it = ir.GetBasicBlocks().begin();
- const u32 first_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", first_address);
-
- AddLine("REP;");
-
- std::size_t num_blocks = 0;
- while (basic_block_it != basic_block_end) {
- const auto& [address, bb] = *basic_block_it;
- ++num_blocks;
-
- AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
- AddLine("IF NE.x;");
-
- VisitBlock(bb);
-
- ++basic_block_it;
-
- if (basic_block_it != basic_block_end) {
- const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
- if (!op || op->GetCode() != OperationCode::Branch) {
- const u32 next_address = basic_block_it->first;
- AddLine("MOV.U PC.x, {};", next_address);
- AddLine("CONT;");
- }
- }
-
- AddLine("ELSE;");
- }
- AddLine("RET;");
- while (num_blocks--) {
- AddLine("ENDIF;");
- }
-
- AddLine("ENDREP;");
-}
-
-void ARBDecompiler::VisitAST(const ASTNode& node) {
- if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
- for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto if_then = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(if_then->condition);
- ResetTemporaries();
-
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("IF NE.x;");
- for (ASTNode current = if_then->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("ENDIF;");
- } else if (const auto if_else = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
- AddLine("ELSE;");
- for (ASTNode current = if_else->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- } else if (const auto decoded = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
- VisitBlock(decoded->nodes);
- } else if (const auto var_set = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
- AddLine("MOV.U F{}, {};", var_set->index, VisitExpression(var_set->condition));
- ResetTemporaries();
- } else if (const auto do_while = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
- const std::string condition = VisitExpression(do_while->condition);
- ResetTemporaries();
- AddLine("REP;");
- for (ASTNode current = do_while->nodes.GetFirst(); current; current = current->GetNext()) {
- VisitAST(current);
- }
- AddLine("MOVC.U RC.x, {};", condition);
- AddLine("BRK (NE.x);");
- AddLine("ENDREP;");
- } else if (const auto ast_return = std::get_if<ASTReturn>(&*node->GetInnerData())) {
- const bool is_true = ExprIsTrue(ast_return->condition);
- if (!is_true) {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_return->condition));
- AddLine("IF NE.x;");
- ResetTemporaries();
- }
- if (ast_return->kills) {
- AddLine("KIL TR;");
- } else {
- Exit();
- }
- if (!is_true) {
- AddLine("ENDIF;");
- }
- } else if (const auto ast_break = std::get_if<ASTBreak>(&*node->GetInnerData())) {
- if (ExprIsTrue(ast_break->condition)) {
- AddLine("BRK;");
- } else {
- AddLine("MOVC.U RC.x, {};", VisitExpression(ast_break->condition));
- AddLine("BRK (NE.x);");
- ResetTemporaries();
- }
- } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
- // Nothing to do
- } else {
- UNREACHABLE();
- }
-}
-
-std::string ARBDecompiler::VisitExpression(const Expr& node) {
- if (const auto expr = std::get_if<ExprAnd>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprOr>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
- VisitExpression(expr->operand2));
- return result;
- }
- if (const auto expr = std::get_if<ExprNot>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
- return result;
- }
- if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
- return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
- }
- if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
- return Visit(ir.GetConditionCode(expr->cc));
- }
- if (const auto expr = std::get_if<ExprVar>(&*node)) {
- return fmt::format("F{}.x", expr->var_index);
- }
- if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
- return expr->value ? "0xffffffff" : "0";
- }
- if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
- std::string result = AllocTemporary();
- AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
- return result;
- }
- UNREACHABLE();
- return "0";
-}
-
-void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node);
- }
-}
-
-std::string ARBDecompiler::Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- const std::size_t index = static_cast<std::size_t>(operation->GetCode());
- if (index >= OPERATION_DECOMPILERS.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", index);
- return {};
- }
- const auto decompiler = OPERATION_DECOMPILERS[index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return "{0, 0, 0, 0}.x";
- }
- return fmt::format("R{}.x", index);
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- return fmt::format("CV{}.x", cv->GetIndex());
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
- return temporary;
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- std::string temporary = AllocTemporary();
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- AddLine("MOV.S {}, -1;", temporary);
- break;
- case Tegra::Shader::Pred::NeverExecute:
- AddLine("MOV.S {}, 0;", temporary);
- break;
- default:
- AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
- break;
- }
- if (predicate->IsNegated()) {
- AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
- }
- return temporary;
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- if (abuf->IsPhysicalBuffer()) {
- UNIMPLEMENTED_MSG("Physical buffers are not implemented");
- return "{0, 0, 0, 0}.x";
- }
-
- const Attribute::Index index = abuf->GetIndex();
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (index) {
- case Attribute::Index::Position: {
- if (stage == ShaderType::Geometry) {
- return fmt::format("{}_position[{}].{}", StageInputName(stage),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
- }
- }
- case Attribute::Index::TessCoordInstanceIDVertexID:
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- return "vertex.instance";
- case 3:
- return "vertex.id";
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- break;
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return "fragment.pointcoord.x";
- case 1:
- return "fragment.pointcoord.y";
- }
- UNIMPLEMENTED();
- break;
- case Attribute::Index::FrontFacing: {
- ASSERT(stage == ShaderType::Fragment);
- ASSERT(element == 3);
- const std::string temporary = AllocVectorTemporary();
- AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
- AddLine("MOV.U.CC RC.x, -RC;");
- AddLine("MOV.S {}.x, 0;", temporary);
- AddLine("MOV.S {}.x (NE.x), -1;", temporary);
- return fmt::format("{}.x", temporary);
- }
- default:
- if (IsGenericAttribute(index)) {
- if (stage == ShaderType::Geometry) {
- return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
- Visit(abuf->GetBuffer()), swizzle);
- } else {
- return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
- GetGenericAttributeIndex(index), swizzle);
- }
- }
- UNIMPLEMENTED_MSG("Unimplemented input attribute={}", index);
- break;
- }
- return "{0, 0, 0, 0}.x";
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- std::string offset_string;
- const auto& offset = cbuf->GetOffset();
- if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
- offset_string = std::to_string(imm->GetValue());
- } else {
- offset_string = Visit(offset);
- }
- std::string temporary = AllocTemporary();
- AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
- return temporary;
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- std::string temporary = AllocTemporary();
- AddLine("MOV {}, 0;", temporary);
- AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
- return temporary;
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- std::string temporary = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", temporary, temporary);
- AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
- return temporary;
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- std::string temporary = Visit(smem->GetAddress());
- AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
- return temporary;
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index));
- }
- AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
- AddLine("IF NE.x;");
- VisitBlock(conditional->GetCode());
- AddLine("ENDIF;");
- return {};
- }
-
- if ([[maybe_unused]] const auto cmt = std::get_if<CommentNode>(&*node)) {
- // Uncommenting this will generate invalid code. GLASM lacks comments.
- // AddLine("// {}", cmt->GetText());
- return {};
- }
-
- UNIMPLEMENTED();
- return {};
-}
-
-std::tuple<std::string, std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(meta.sampler.is_indexed);
-
- const bool is_extended = meta.sampler.is_shadow && meta.sampler.is_array &&
- meta.sampler.type == Tegra::Shader::TextureType::TextureCube;
- const std::size_t count = operation.GetOperandsCount();
- std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- if (meta.sampler.is_array) {
- AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i), Visit(meta.array));
- ++i;
- }
- if (meta.sampler.is_shadow) {
- std::string compare = Visit(meta.depth_compare);
- if (is_extended) {
- ASSERT(i == 4);
- std::string extra_coord = AllocVectorTemporary();
- AddLine("MOV.F {}.x, {};", extra_coord, compare);
- return {fmt::format("{}, {}", temporary, extra_coord), extra_coord, 0};
- }
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), compare);
- ++i;
- }
- return {temporary, temporary, i};
-}
-
-std::string ARBDecompiler::BuildAoffi(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- if (meta.aoffi.empty()) {
- return {};
- }
- const std::string temporary = AllocVectorTemporary();
- std::size_t i = 0;
- for (auto& node : meta.aoffi) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
- }
- return fmt::format(", offset({})", temporary);
-}
-
-std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
- // Read a bindless SSBO, return its address and set CC accordingly
- // address = c[binding].xy
- // length = c[binding].z
- const u32 binding = global_memory_names.at(gmem.GetDescriptor());
-
- const std::string pointer = AllocLongVectorTemporary();
- std::string temporary = AllocTemporary();
-
- AddLine("PK64.U {}, c[{}];", pointer, binding);
- AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
- Visit(gmem.GetBaseAddress()));
- AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
- AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
- // Compare offset to length and set CC
- AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
- return fmt::format("{}.x", pointer);
-}
-
-void ARBDecompiler::Exit() {
- if (stage != ShaderType::Fragment) {
- AddLine("RET;");
- return;
- }
-
- const auto safe_get_register = [this](u32 reg) -> std::string {
- if (ir.GetRegisters().contains(reg)) {
- return fmt::format("R{}.x", reg);
- }
- return "{0, 0, 0, 0}.x";
- };
-
- const auto& header = ir.GetHeader();
- u32 current_reg = 0;
- for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
- for (u32 component = 0; component < 4; ++component) {
- if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
- continue;
- }
- AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
- safe_get_register(current_reg));
- ++current_reg;
- }
- }
- if (header.ps.omap.depth) {
- AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
- }
-
- AddLine("RET;");
-}
-
-std::string ARBDecompiler::Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string dest_name;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op
- return {};
- }
- dest_name = fmt::format("R{}.x", gpr->GetIndex());
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- const u32 element = abuf->GetElement();
- const char swizzle = Swizzle(element);
- switch (const Attribute::Index index = abuf->GetIndex()) {
- case Attribute::Index::Position:
- dest_name = fmt::format("result.position.{}", swizzle);
- break;
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return {};
- case 1:
- case 2:
- if (!device.HasNvViewportArray2()) {
- LOG_ERROR(
- Render_OpenGL,
- "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
- return {};
- }
- dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
- break;
- case 3:
- dest_name = "result.pointsize.x";
- break;
- }
- break;
- case Attribute::Index::ClipDistances0123:
- dest_name = fmt::format("result.clip[{}].x", element);
- break;
- case Attribute::Index::ClipDistances4567:
- dest_name = fmt::format("result.clip[{}].x", element + 4);
- break;
- default:
- if (!IsGenericAttribute(index)) {
- UNREACHABLE();
- return {};
- }
- dest_name =
- fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
- break;
- }
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- const std::string address = Visit(lmem->GetAddress());
- AddLine("SHR.U {}, {}, 2;", address, address);
- dest_name = fmt::format("lmem[{}].x", address);
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
- ResetTemporaries();
- return {};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- AddLine("IF NE.x;");
- AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
- AddLine("ENDIF;");
- ResetTemporaries();
- return {};
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", dest_name, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::Select(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
- Visit(operation[2]));
- return temporary;
-}
-
-std::string ARBDecompiler::FClamp(Operation operation) {
- // 1.0f in hex, replace with std::bit_cast on C++20
- static constexpr u32 POSITIVE_ONE = 0x3f800000;
-
- std::string temporary = AllocTemporary();
- const Node& value = operation[0];
- const Node& low = operation[1];
- const Node& high = operation[2];
- const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
- const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
- if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
- AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
- } else {
- AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
- AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
- }
- return temporary;
-}
-
-std::string ARBDecompiler::FCastHalf0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FCastHalf1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
- AddLine("MOV {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::FSqrt(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
- AddLine("RCP.F32 {}, {};", temporary, temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
- return fmt::format("{}.x", temporary);
- }
-
- AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
- AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
- AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
- AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
- AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
- AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
- AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HAdd2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HMul2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HFma2(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- const std::string tmp3 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
- AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
- AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HAbsolute(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HNegate(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
- AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
- AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
- AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HClamp(Operation operation) {
- const std::string tmp1 = AllocVectorTemporary();
- const std::string tmp2 = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
- AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
- AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
- AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
- return fmt::format("{}.x", tmp1);
-}
-
-std::string ARBDecompiler::HCastFloat(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
- AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HUnpack(Operation operation) {
- std::string operand = Visit(operation[0]);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H0_H0: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- case Tegra::Shader::HalfType::H1_H1: {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, operand);
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
- }
- }
- UNREACHABLE();
- return "{0, 0, 0, 0}.x";
-}
-
-std::string ARBDecompiler::HMergeF32(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH0(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HMergeH1(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
- AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
- AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::HPack2(Operation operation) {
- const std::string temporary = AllocVectorTemporary();
- AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
- AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
- AddLine("PK2H.F {}.x, {};", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const Tegra::Shader::Pred index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = fmt::format("P{}.x", static_cast<u64>(index));
- } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
- const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
- target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
- } else {
- UNREACHABLE();
- ResetTemporaries();
- return {};
- }
-
- AddLine("MOV.U {}, {};", target, Visit(src));
- ResetTemporaries();
- return {};
-}
-
-std::string ARBDecompiler::LogicalPick2(Operation operation) {
- std::string temporary = AllocTemporary();
- const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
- AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAnd2(Operation operation) {
- std::string temporary = AllocTemporary();
- const std::string op = Visit(operation[0]);
- AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatOrdered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("MOV.S {} (NAN.x), 0;", temporary);
- AddLine("MOV.S {} (NAN.y), 0;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::FloatUnordered(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
- AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("MOV.S {} (NAN.x), -1;", temporary);
- AddLine("MOV.S {} (NAN.y), -1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
- std::string temporary = AllocTemporary();
- AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
- AddLine("MOV.S {}, 0;", temporary);
- AddLine("IF CF.x;");
- AddLine("MOV.S {}, -1;", temporary);
- AddLine("ENDIF;");
- return temporary;
-}
-
-std::string ARBDecompiler::Texture(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string_view opcode = "TEX";
- std::string extra;
- if (meta.bias) {
- ASSERT(!meta.lod);
- opcode = "TXB";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
- } else {
- const std::string bias = AllocTemporary();
- AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
- extra = fmt::format(" {},", bias);
- }
- }
- if (meta.lod) {
- ASSERT(!meta.bias);
- opcode = "TXL";
-
- if (swizzle < 4) {
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- } else {
- const std::string lod = AllocTemporary();
- AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
- extra = fmt::format(" {},", lod);
- }
- }
-
- AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, coords, extra, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- std::string comp;
- if (!meta.sampler.is_shadow) {
- const auto& immediate = std::get<ImmediateNode>(*meta.component);
- comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
- }
-
- AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, coords, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
- AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureQueryLod(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const std::string temporary = AllocVectorTemporary();
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
-
- ASSERT(!meta.sampler.is_array);
-
- const std::size_t count = operation.GetOperandsCount();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
- AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
- AddLine("TRUNC.S {}, {};", temporary, temporary);
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TexelFetch(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const auto [coords, temporary, swizzle] = BuildCoords(operation);
-
- if (!meta.sampler.is_buffer) {
- ASSERT(swizzle < 4);
- AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
- }
- AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, coords, sampler_id, TextureType(meta),
- BuildAoffi(operation));
- AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
- const std::string ddx = AllocVectorTemporary();
- const std::string ddy = AllocVectorTemporary();
- const std::string coord = std::get<1>(BuildCoords(operation));
-
- const std::size_t num_components = meta.derivates.size() / 2;
- for (std::size_t index = 0; index < num_components; ++index) {
- const char swizzle = Swizzle(index);
- AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
- AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
- }
-
- const std::string_view result = coord;
- AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
- TextureType(meta), BuildAoffi(operation));
- AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
- return fmt::format("{}.x", result);
-}
-
-std::string ARBDecompiler::ImageLoad(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t count = operation.GetOperandsCount();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string temporary = AllocVectorTemporary();
- for (std::size_t i = 0; i < count; ++i) {
- AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
- }
- AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
- AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::ImageStore(Operation operation) {
- const auto& meta = std::get<MetaImage>(operation.GetMeta());
- const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
- const std::size_t num_coords = operation.GetOperandsCount();
- const std::size_t num_values = meta.values.size();
- const std::string_view type = ImageType(meta.image.type);
-
- const std::string coord = AllocVectorTemporary();
- const std::string value = AllocVectorTemporary();
- for (std::size_t i = 0; i < num_coords; ++i) {
- AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
- }
- for (std::size_t i = 0; i < num_values; ++i) {
- AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
- }
- AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
- return {};
-}
-
-std::string ARBDecompiler::Branch(Operation operation) {
- const auto target = std::get<ImmediateNode>(*operation[0]);
- AddLine("MOV.U PC.x, {};", target.GetValue());
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::BranchIndirect(Operation operation) {
- AddLine("MOV.U PC.x, {};", Visit(operation[0]));
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
- const std::string_view stack_name = StackName(stack);
- AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
- AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- return {};
-}
-
-std::string ARBDecompiler::PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const std::string_view stack_name = StackName(stack);
- AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
- AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
- AddLine("CONT;");
- return {};
-}
-
-std::string ARBDecompiler::Exit(Operation) {
- Exit();
- return {};
-}
-
-std::string ARBDecompiler::Discard(Operation) {
- AddLine("KIL TR;");
- return {};
-}
-
-std::string ARBDecompiler::EmitVertex(Operation) {
- AddLine("EMIT;");
- return {};
-}
-
-std::string ARBDecompiler::EndPrimitive(Operation) {
- AddLine("ENDPRIM;");
- return {};
-}
-
-std::string ARBDecompiler::InvocationId(Operation) {
- return "primitive.invocation";
-}
-
-std::string ARBDecompiler::YNegate(Operation) {
- LOG_WARNING(Render_OpenGL, "(STUBBED)");
- std::string temporary = AllocTemporary();
- AddLine("MOV.F {}, 1;", temporary);
- return temporary;
-}
-
-std::string ARBDecompiler::ThreadId(Operation) {
- return fmt::format("{}.threadid", StageInputName(stage));
-}
-
-std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "NV_shader_thread_shuffle is missing. Kepler or better is required.");
- return Visit(operation[0]);
- }
- const std::string temporary = AllocVectorTemporary();
- AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
- Visit(operation[1]));
- AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
- return fmt::format("{}.x", temporary);
-}
-
-std::string ARBDecompiler::Barrier(Operation) {
- AddLine("BAR;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
- AddLine("MEMBAR.CTA;");
- return {};
-}
-
-std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
- AddLine("MEMBAR;");
- return {};
-}
-
-} // Anonymous namespace
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier) {
- return ARBDecompiler(device, ir, registry, stage, identifier).Code();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
deleted file mode 100644
index 6afc87220..000000000
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2020 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <string>
-#include <string_view>
-
-#include "common/common_types.h"
-
-namespace Tegra::Engines {
-enum class ShaderType : u32;
-}
-
-namespace VideoCommon::Shader {
-class ShaderIR;
-class Registry;
-} // namespace VideoCommon::Shader
-
-namespace OpenGL {
-
-class Device;
-
-std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier);
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index c225d1fc9..07a995f7d 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <span>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
+using VideoCore::Surface::PixelFormat;
+
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
@@ -21,6 +25,25 @@ constexpr std::array PROGRAM_LUT{
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
+
+[[nodiscard]] GLenum GetTextureBufferFormat(GLenum gl_format) {
+ switch (gl_format) {
+ case GL_RGBA8_SNORM:
+ return GL_RGBA8;
+ case GL_R8_SNORM:
+ return GL_R8;
+ case GL_RGBA16_SNORM:
+ return GL_RGBA16;
+ case GL_R16_SNORM:
+ return GL_R16;
+ case GL_RG16_SNORM:
+ return GL_RG16;
+ case GL_RG8_SNORM:
+ return GL_RG8;
+ default:
+ return gl_format;
+ }
+}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
@@ -62,6 +85,30 @@ void Buffer::MakeResident(GLenum access) noexcept {
glMakeNamedBufferResidentNV(buffer.handle, access);
}
+GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
+ const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
+ return offset == view.offset && size == view.size && format == view.format;
+ })};
+ if (it != views.end()) {
+ return it->texture.handle;
+ }
+ OGLTexture texture;
+ texture.Create(GL_TEXTURE_BUFFER);
+ const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
+ const GLenum texture_format{GetTextureBufferFormat(gl_format)};
+ if (texture_format != gl_format) {
+ LOG_WARNING(Render_OpenGL, "Emulating SNORM texture buffer with UNORM.");
+ }
+ glTextureBufferRange(texture.handle, texture_format, buffer.handle, offset, size);
+ views.push_back({
+ .offset = offset,
+ .size = size,
+ .format = format,
+ .texture = std::move(texture),
+ });
+ return views.back().texture.handle;
+}
+
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
@@ -98,6 +145,12 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
}
}
+void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {
+ glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast<GLintptr>(offset),
+ static_cast<GLsizeiptr>(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT,
+ &value);
+}
+
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
if (has_unified_vertex_buffers) {
buffer.MakeResident(GL_READ_ONLY);
@@ -138,7 +191,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -165,7 +218,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
u32 offset, u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ const GLuint base_binding = graphics_base_storage_bindings[stage];
+ const GLuint binding = base_binding + binding_index;
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -174,17 +232,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else {
- const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
- const GLuint binding = base_binding + binding_index;
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
u32 size, bool is_written) {
- if (use_assembly_shaders) {
+ if (use_storage_buffers) {
+ if (size != 0) {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
+ static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
+ } else {
+ glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
+ }
+ } else {
const BindlessSSBO ssbo{
.address = buffer.HostGpuAddr() + offset,
.length = static_cast<GLsizei>(size),
@@ -193,11 +253,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
- } else if (size == 0) {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
- } else {
- glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
- static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
}
@@ -207,4 +262,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
+void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ PixelFormat format) {
+ *texture_handles++ = buffer.View(offset, size, format);
+}
+
+void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
+ *image_handles++ = buffer.View(offset, size, format);
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index d8b20a9af..060d36427 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
void MakeResident(GLenum access) noexcept;
+ [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
+
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
@@ -41,9 +43,17 @@ public:
}
private:
+ struct BufferView {
+ u32 offset;
+ u32 size;
+ VideoCore::Surface::PixelFormat format;
+ OGLTexture texture;
+ };
+
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
+ std::vector<BufferView> views;
};
class BufferCacheRuntime {
@@ -57,6 +67,8 @@ public:
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies);
+ void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
+
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
@@ -73,17 +85,21 @@ public:
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
+ void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
+ void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
+ VideoCore::Surface::PixelFormat format);
+
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
+ const GLuint handle = fast_uniforms[stage][binding_index].handle;
+ const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
if (use_assembly_shaders) {
- const GLuint handle = fast_uniforms[stage][binding_index].handle;
- const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
- glBindBufferRange(GL_UNIFORM_BUFFER, binding,
- fast_uniforms[stage][binding_index].handle, 0,
- static_cast<GLsizeiptr>(size));
+ glBindBufferRange(GL_UNIFORM_BUFFER, binding, handle, 0, gl_size);
}
}
@@ -101,7 +117,7 @@ public:
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
- const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
+ const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -116,6 +132,27 @@ public:
return has_fast_buffer_sub_data;
}
+ [[nodiscard]] bool SupportsNonZeroUniformOffset() const noexcept {
+ return !use_assembly_shaders;
+ }
+
+ void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_uniform_bindings = bindings;
+ }
+
+ void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
+ graphics_base_storage_bindings = bindings;
+ }
+
+ void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
+ texture_handles = texture_handles_;
+ image_handles = image_handles_;
+ }
+
+ void SetEnableStorageBuffers(bool use_storage_buffers_) {
+ use_storage_buffers = use_storage_buffers_;
+ }
+
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -129,8 +166,15 @@ private:
bool use_assembly_shaders = false;
bool has_unified_vertex_buffers = false;
+ bool use_storage_buffers = false;
+
u32 max_attributes = 0;
+ std::array<GLuint, 5> graphics_base_uniform_bindings{};
+ std::array<GLuint, 5> graphics_base_storage_bindings{};
+ GLuint* texture_handles = nullptr;
+ GLuint* image_handles = nullptr;
+
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -154,6 +198,7 @@ struct BufferCacheParams {
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
+ static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
new file mode 100644
index 000000000..aa1cc592f
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -0,0 +1,209 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/cityhash.h"
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+using Shader::ImageBufferDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 16;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+ u32 num{};
+ for (const auto& desc : range) {
+ num += desc.count;
+ }
+ return num;
+}
+
+size_t ComputePipelineKey::Hash() const noexcept {
+ return static_cast<size_t>(
+ Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
+}
+
+bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, sizeof *this) == 0;
+}
+
+ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, const Shader::Info& info_,
+ std::string code, std::vector<u32> code_v)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
+ kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ source_program = CreateProgram(code, GL_COMPUTE_SHADER);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
+ break;
+ }
+ std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
+ uniform_buffer_sizes.begin());
+
+ num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
+ num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
+
+ const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
+ ASSERT(num_textures <= MAX_TEXTURES);
+
+ const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
+ ASSERT(num_images <= MAX_IMAGES);
+
+ const bool is_glasm{assembly_program.handle != 0};
+ const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
+ use_storage_buffers =
+ !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory = !use_storage_buffers &&
+ std::ranges::any_of(info.storage_buffers_descriptors,
+ [](const auto& desc) { return desc.is_written; });
+}
+
+void ComputePipeline::Configure() {
+ buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
+ buffer_cache.UnbindComputeStorageBuffers();
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
+ desc.is_written);
+ ++ssbo_index;
+ }
+ texture_cache.SynchronizeComputeDescriptors();
+
+ std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ GLsizei sampler_binding{};
+ GLsizei texture_binding{};
+ GLsizei image_binding{};
+
+ const auto& qmd{kepler_compute.launch_description};
+ const auto& cbufs{qmd.const_buffer_config};
+ const bool via_header_index{qmd.linked_tsc != 0};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
+ if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
+ const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
+ secondary_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ return TexturePair(lhs_raw | rhs_raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ }
+ }};
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ std::ranges::for_each(info.image_buffer_descriptors, add_image);
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices.push_back(handle.first);
+
+ Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ std::ranges::for_each(info.image_descriptors, add_image);
+
+ const std::span indices_span(image_view_indices.data(), image_view_indices.size());
+ texture_cache.FillComputeImageViews(indices_span, image_view_ids);
+
+ if (assembly_program.handle != 0) {
+ program_manager.BindComputeAssemblyProgram(assembly_program.handle);
+ } else {
+ program_manager.BindComputeProgram(source_program.handle);
+ }
+ buffer_cache.UnbindComputeTextureBuffers();
+ size_t texbuf_index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
+ buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++texbuf_index;
+ }
+ }};
+ std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
+ std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
+
+ buffer_cache.UpdateComputeBuffers();
+
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+ buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
+ buffer_cache.BindHostComputeBuffers();
+
+ const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
+ texture_binding += num_texture_buffers;
+ image_binding += num_image_buffers;
+
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ }
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
new file mode 100644
index 000000000..50c676365
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -0,0 +1,93 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <type_traits>
+#include <utility>
+
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace Tegra::Engines {
+class KeplerCompute;
+}
+
+namespace Shader {
+struct Info;
+}
+
+namespace OpenGL {
+
+class Device;
+class ProgramManager;
+
+struct ComputePipelineKey {
+ u64 unique_hash;
+ u32 shared_memory_size;
+ std::array<u32, 3> workgroup_size;
+
+ size_t Hash() const noexcept;
+
+ bool operator==(const ComputePipelineKey&) const noexcept;
+
+ bool operator!=(const ComputePipelineKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+};
+static_assert(std::has_unique_object_representations_v<ComputePipelineKey>);
+static_assert(std::is_trivially_copyable_v<ComputePipelineKey>);
+static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
+
+class ComputePipeline {
+public:
+ explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ ProgramManager& program_manager_, const Shader::Info& info_,
+ std::string code, std::vector<u32> code_v);
+
+ void Configure();
+
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
+private:
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ ProgramManager& program_manager;
+
+ Shader::Info info;
+ OGLProgram source_program;
+ OGLAssemblyProgram assembly_program;
+ VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
+
+ u32 num_texture_buffers{};
+ u32 num_image_buffers{};
+
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::ComputePipelineKey> {
+ size_t operator()(const OpenGL::ComputePipelineKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3b00614e7..9692b8e94 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -17,39 +17,17 @@
#include "common/logging/log.h"
#include "common/scope_exit.h"
#include "common/settings.h"
+#include "shader_recompiler/stage.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
namespace {
-// One uniform block is reserved for emulation purposes
-constexpr u32 ReservedUniformBlocks = 1;
-
-constexpr u32 NumStages = 5;
-
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
-constexpr std::array LIMIT_SSBOS = {
- GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
- GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
- GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
-};
-constexpr std::array LIMIT_SAMPLERS = {
- GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
- GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
- GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
- GL_MAX_TEXTURE_IMAGE_UNITS,
- GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
-};
-constexpr std::array LIMIT_IMAGES = {
- GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
- GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
- GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
-};
template <typename T>
T GetInteger(GLenum pname) {
@@ -82,81 +60,18 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
return std::ranges::find(extensions, extension) != extensions.end();
}
-u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
- ASSERT(num >= amount);
- if (limit) {
- amount = std::min(amount, GetInteger<u32>(*limit));
- }
- num -= amount;
- return std::exchange(base, base + amount);
-}
-
-std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
- std::array<u32, Tegra::Engines::MaxShaderTypes> max;
- std::ranges::transform(LIMIT_UBOS, max.begin(),
- [](GLenum pname) { return GetInteger<u32>(pname); });
+std::array<u32, Shader::MaxStageTypes> BuildMaxUniformBuffers() noexcept {
+ std::array<u32, Shader::MaxStageTypes> max;
+ std::ranges::transform(LIMIT_UBOS, max.begin(), &GetInteger<u32>);
return max;
}
-std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
- std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
-
- static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
- const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
- const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
- const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
-
- u32 num_ubos = total_ubos - ReservedUniformBlocks;
- u32 num_ssbos = total_ssbos;
- u32 num_samplers = total_samplers;
-
- u32 base_ubo = ReservedUniformBlocks;
- u32 base_ssbo = 0;
- u32 base_samplers = 0;
-
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- bindings[stage] = {
- Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
- Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
- Extract(base_samplers, num_samplers, total_samplers / NumStages,
- LIMIT_SAMPLERS[stage])};
- }
-
- u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
- u32 base_images = 0;
-
- // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
- // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
- // fragment stage, and at least 1 for the rest of the stages.
- // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
-
- // Reserve at least 4 image bindings on the fragment stage.
- bindings[4].image =
- Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
-
- // This is guaranteed to be at least 1.
- const u32 total_extracted_images = num_images / (NumStages - 1);
-
- // Reserve the other image bindings.
- for (std::size_t i = 0; i < NumStages; ++i) {
- const std::size_t stage = stage_swizzle[i];
- if (stage == 4) {
- continue;
- }
- bindings[stage].image =
- Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
- }
-
- // Compute doesn't care about any of this.
- bindings[5] = {0, 0, 0, 0};
-
- return bindings;
-}
-
bool IsASTCSupported() {
- static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
- static constexpr std::array formats = {
+ static constexpr std::array targets{
+ GL_TEXTURE_2D,
+ GL_TEXTURE_2D_ARRAY,
+ };
+ static constexpr std::array formats{
GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
@@ -172,11 +87,10 @@ bool IsASTCSupported() {
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
};
- static constexpr std::array required_support = {
+ static constexpr std::array required_support{
GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
};
-
for (const GLenum target : targets) {
for (const GLenum format : formats) {
for (const GLenum support : required_support) {
@@ -223,14 +137,13 @@ Device::Device() {
"Beta driver 443.24 is known to have issues. There might be performance issues.");
disable_fast_buffer_sub_data = true;
}
-
max_uniform_buffers = BuildMaxUniformBuffers();
- base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
+ max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
GLAD_GL_NV_shader_thread_shuffle;
has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -243,18 +156,30 @@ Device::Device() {
has_precise_bug = TestPreciseBug();
has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
+ has_derivative_control = GLAD_GL_ARB_derivative_control;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
+ has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
+ has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
+ has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
+ has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
+ has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+ warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
+ need_fastmath_off = is_nvidia;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
- use_assembly_shaders = Settings::values.use_assembly_shaders.GetValue() &&
+ shader_backend = Settings::values.shader_backend.GetValue();
+ use_assembly_shaders = shader_backend == Settings::ShaderBackend::GLASM &&
GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 &&
GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2;
-
+ if (shader_backend == Settings::ShaderBackend::GLASM && !use_assembly_shaders) {
+ LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
+ shader_backend = Settings::ShaderBackend::GLSL;
+ }
// Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue() &&
!(is_amd || (is_intel && !is_linux));
@@ -265,11 +190,6 @@ Device::Device() {
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
LOG_INFO(Render_OpenGL, "Renderer_BrokenTextureViewFormats: {}",
has_broken_texture_view_formats);
-
- if (Settings::values.use_assembly_shaders.GetValue() && !use_assembly_shaders) {
- LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
- }
-
if (Settings::values.use_asynchronous_shaders.GetValue() && !use_asynchronous_shaders) {
LOG_WARNING(Render_OpenGL, "Asynchronous shader compilation enabled but not supported");
}
@@ -325,22 +245,6 @@ std::string Device::GetVendorName() const {
return vendor_name;
}
-Device::Device(std::nullptr_t) {
- max_uniform_buffers.fill(std::numeric_limits<u32>::max());
- uniform_buffer_alignment = 4;
- shader_storage_alignment = 4;
- max_vertex_attributes = 16;
- max_varyings = 15;
- max_compute_shared_memory_size = 0x10000;
- has_warp_intrinsics = true;
- has_shader_ballot = true;
- has_vertex_viewport_layer = true;
- has_image_load_formatted = true;
- has_texture_shadow_lod = true;
- has_variable_aoffi = true;
- has_depth_buffer_float = true;
-}
-
bool Device::TestVariableAoffi() {
return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2c2b13767..ee992aed4 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,34 +6,22 @@
#include <cstddef>
#include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
+#include "shader_recompiler/stage.h"
+
+namespace Settings {
+enum class ShaderBackend : u32;
+};
namespace OpenGL {
class Device {
public:
- struct BaseBindings {
- u32 uniform_buffer{};
- u32 shader_storage_buffer{};
- u32 sampler{};
- u32 image{};
- };
-
explicit Device();
- explicit Device(std::nullptr_t);
[[nodiscard]] std::string GetVendorName() const;
- u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
- return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
- }
-
- const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
- return base_bindings[stage_index];
- }
-
- const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
- return GetBaseBindings(static_cast<std::size_t>(shader_type));
+ u32 GetMaxUniformBuffers(Shader::Stage stage) const noexcept {
+ return max_uniform_buffers[static_cast<size_t>(stage)];
}
size_t GetUniformBufferAlignment() const {
@@ -56,6 +44,10 @@ public:
return max_compute_shared_memory_size;
}
+ u32 GetMaxGLASMStorageBufferBlocks() const {
+ return max_glasm_storage_buffer_blocks;
+ }
+
bool HasWarpIntrinsics() const {
return has_warp_intrinsics;
}
@@ -108,6 +100,10 @@ public:
return has_nv_viewport_array2;
}
+ bool HasDerivativeControl() const {
+ return has_derivative_control;
+ }
+
bool HasDebuggingToolAttached() const {
return has_debugging_tool_attached;
}
@@ -128,18 +124,52 @@ public:
return has_depth_buffer_float;
}
+ bool HasGeometryShaderPassthrough() const {
+ return has_geometry_shader_passthrough;
+ }
+
+ bool HasNvGpuShader5() const {
+ return has_nv_gpu_shader_5;
+ }
+
+ bool HasShaderInt64() const {
+ return has_shader_int64;
+ }
+
+ bool HasAmdShaderHalfFloat() const {
+ return has_amd_shader_half_float;
+ }
+
+ bool HasSparseTexture2() const {
+ return has_sparse_texture_2;
+ }
+
+ bool IsWarpSizePotentiallyLargerThanGuest() const {
+ return warp_size_potentially_larger_than_guest;
+ }
+
+ bool NeedsFastmathOff() const {
+ return need_fastmath_off;
+ }
+
+ Settings::ShaderBackend GetShaderBackend() const {
+ return shader_backend;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
- std::string vendor_name;
- std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
- std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
+ std::array<u32, Shader::MaxStageTypes> max_uniform_buffers{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
u32 max_compute_shared_memory_size{};
+ u32 max_glasm_storage_buffer_blocks{};
+
+ Settings::ShaderBackend shader_backend{};
+
bool has_warp_intrinsics{};
bool has_shader_ballot{};
bool has_vertex_viewport_layer{};
@@ -153,11 +183,21 @@ private:
bool has_broken_texture_view_formats{};
bool has_fast_buffer_sub_data{};
bool has_nv_viewport_array2{};
+ bool has_derivative_control{};
bool has_debugging_tool_attached{};
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
bool has_depth_buffer_float{};
+ bool has_geometry_shader_passthrough{};
+ bool has_nv_gpu_shader_5{};
+ bool has_shader_int64{};
+ bool has_amd_shader_half_float{};
+ bool has_sparse_texture_2{};
+ bool warp_size_potentially_larger_than_guest{};
+ bool need_fastmath_off{};
+
+ std::string vendor_name;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
new file mode 100644
index 000000000..fac0034fb
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -0,0 +1,572 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <string>
+#include <vector>
+
+#include "common/settings.h" // for enum class Settings::ShaderBackend
+#include "common/thread_worker.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
+#include "video_core/shader_notify.h"
+#include "video_core/texture_cache/texture_cache.h"
+
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
+#else
+#define LAMBDA_FORCEINLINE
+#endif
+
+namespace OpenGL {
+namespace {
+using Shader::ImageBufferDescriptor;
+using Shader::ImageDescriptor;
+using Shader::TextureBufferDescriptor;
+using Shader::TextureDescriptor;
+using Tegra::Texture::TexturePair;
+using VideoCommon::ImageId;
+
+constexpr u32 MAX_TEXTURES = 64;
+constexpr u32 MAX_IMAGES = 8;
+
+template <typename Range>
+u32 AccumulateCount(const Range& range) {
+ u32 num{};
+ for (const auto& desc : range) {
+ num += desc.count;
+ }
+ return num;
+}
+
+GLenum Stage(size_t stage_index) {
+ switch (stage_index) {
+ case 0:
+ return GL_VERTEX_SHADER;
+ case 1:
+ return GL_TESS_CONTROL_SHADER;
+ case 2:
+ return GL_TESS_EVALUATION_SHADER;
+ case 3:
+ return GL_GEOMETRY_SHADER;
+ case 4:
+ return GL_FRAGMENT_SHADER;
+ }
+ UNREACHABLE_MSG("{}", stage_index);
+ return GL_NONE;
+}
+
+GLenum AssemblyStage(size_t stage_index) {
+ switch (stage_index) {
+ case 0:
+ return GL_VERTEX_PROGRAM_NV;
+ case 1:
+ return GL_TESS_CONTROL_PROGRAM_NV;
+ case 2:
+ return GL_TESS_EVALUATION_PROGRAM_NV;
+ case 3:
+ return GL_GEOMETRY_PROGRAM_NV;
+ case 4:
+ return GL_FRAGMENT_PROGRAM_NV;
+ }
+ UNREACHABLE_MSG("{}", stage_index);
+ return GL_NONE;
+}
+
+/// Translates hardware transform feedback indices
+/// @param location Hardware location
+/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
+/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
+std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
+ const u8 index = location / 4;
+ if (index >= 8 && index <= 39) {
+ return {GL_GENERIC_ATTRIB_NV, index - 8};
+ }
+ if (index >= 48 && index <= 55) {
+ return {GL_TEXTURE_COORD_NV, index - 48};
+ }
+ switch (index) {
+ case 7:
+ return {GL_POSITION, 0};
+ case 40:
+ return {GL_PRIMARY_COLOR_NV, 0};
+ case 41:
+ return {GL_SECONDARY_COLOR_NV, 0};
+ case 42:
+ return {GL_BACK_PRIMARY_COLOR_NV, 0};
+ case 43:
+ return {GL_BACK_SECONDARY_COLOR_NV, 0};
+ }
+ UNIMPLEMENTED_MSG("index={}", index);
+ return {GL_POSITION, 0};
+}
+
+template <typename Spec>
+bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+ for (size_t stage = 0; stage < stage_infos.size(); ++stage) {
+ if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) {
+ return false;
+ }
+ const auto& info{stage_infos[stage]};
+ if constexpr (!Spec::has_storage_buffers) {
+ if (!info.storage_buffers_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_texture_buffers) {
+ if (!info.texture_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_image_buffers) {
+ if (!info.image_buffer_descriptors.empty()) {
+ return false;
+ }
+ }
+ if constexpr (!Spec::has_images) {
+ if (!info.image_descriptors.empty()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
+
+template <typename Spec, typename... Specs>
+ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) {
+ if constexpr (sizeof...(Specs) > 0) {
+ if (!Passes<Spec>(stage_infos, enabled_mask)) {
+ return FindSpec<Specs...>(stage_infos, enabled_mask);
+ }
+ }
+ return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
+}
+
+struct SimpleVertexFragmentSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct SimpleVertexSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
+ static constexpr bool has_storage_buffers = false;
+ static constexpr bool has_texture_buffers = false;
+ static constexpr bool has_image_buffers = false;
+ static constexpr bool has_images = false;
+};
+
+struct DefaultSpec {
+ static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
+ static constexpr bool has_storage_buffers = true;
+ static constexpr bool has_texture_buffers = true;
+ static constexpr bool has_image_buffers = true;
+ static constexpr bool has_images = true;
+};
+
+ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) {
+ return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask);
+}
+} // Anonymous namespace
+
+GraphicsPipeline::GraphicsPipeline(
+ const Device& device, TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_, ShaderWorker* thread_worker,
+ VideoCore::ShaderNotify* shader_notify, std::array<std::string, 5> sources,
+ std::array<std::vector<u32>, 5> sources_spirv, const std::array<const Shader::Info*, 5>& infos,
+ const GraphicsPipelineKey& key_)
+ : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
+ gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
+ state_tracker{state_tracker_}, key{key_} {
+ if (shader_notify) {
+ shader_notify->MarkShaderBuilding();
+ }
+ u32 num_textures{};
+ u32 num_images{};
+ u32 num_storage_buffers{};
+ for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
+ auto& info{stage_infos[stage]};
+ if (infos[stage]) {
+ info = *infos[stage];
+ enabled_stages_mask |= 1u << stage;
+ }
+ if (stage < 4) {
+ base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
+ base_storage_bindings[stage + 1] = base_storage_bindings[stage];
+
+ base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
+ base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
+ }
+ enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
+ std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
+
+ const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
+ num_texture_buffers[stage] += num_tex_buffer_bindings;
+ num_textures += num_tex_buffer_bindings;
+
+ const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
+ num_image_buffers[stage] += num_img_buffers_bindings;
+ num_images += num_img_buffers_bindings;
+
+ num_textures += AccumulateCount(info.texture_descriptors);
+ num_images += AccumulateCount(info.image_descriptors);
+ num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
+
+ writes_global_memory |= std::ranges::any_of(
+ info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
+ }
+ ASSERT(num_textures <= MAX_TEXTURES);
+ ASSERT(num_images <= MAX_IMAGES);
+
+ const bool assembly_shaders{assembly_programs[0].handle != 0};
+ use_storage_buffers =
+ !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+ writes_global_memory &= !use_storage_buffers;
+ configure_func = ConfigureFunc(stage_infos, enabled_stages_mask);
+
+ if (key.xfb_enabled && device.UseAssemblyShaders()) {
+ GenerateTransformFeedbackState();
+ }
+ const bool in_parallel = thread_worker != nullptr;
+ const auto backend = device.GetShaderBackend();
+ auto func{[this, sources = std::move(sources), sources_spirv = std::move(sources_spirv),
+ shader_notify, backend, in_parallel](ShaderContext::Context*) mutable {
+ for (size_t stage = 0; stage < 5; ++stage) {
+ switch (backend) {
+ case Settings::ShaderBackend::GLSL:
+ if (!sources[stage].empty()) {
+ source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
+ }
+ break;
+ case Settings::ShaderBackend::GLASM:
+ if (!sources[stage].empty()) {
+ assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
+ if (in_parallel) {
+ // Make sure program is built before continuing when building in parallel
+ glGetString(GL_PROGRAM_ERROR_STRING_NV);
+ }
+ }
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ if (!sources_spirv[stage].empty()) {
+ source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
+ }
+ break;
+ }
+ }
+ if (in_parallel && backend != Settings::ShaderBackend::GLASM) {
+ // Make sure programs have built if we are building shaders in parallel
+ for (OGLProgram& program : source_programs) {
+ if (program.handle != 0) {
+ GLint status{};
+ glGetProgramiv(program.handle, GL_LINK_STATUS, &status);
+ }
+ }
+ }
+ if (shader_notify) {
+ shader_notify->MarkShaderComplete();
+ }
+ is_built = true;
+ built_condvar.notify_one();
+ }};
+ if (thread_worker) {
+ thread_worker->QueueWork(std::move(func));
+ } else {
+ func(nullptr);
+ }
+}
+
+template <typename Spec>
+void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
+ std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
+ std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
+ std::array<GLuint, MAX_TEXTURES> samplers;
+ size_t image_view_index{};
+ GLsizei sampler_binding{};
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+
+ buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
+ buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
+ buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
+ buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
+
+ const auto& regs{maxwell3d.regs};
+ const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
+ const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsStorageBuffers(stage);
+ if constexpr (Spec::has_storage_buffers) {
+ size_t ssbo_index{};
+ for (const auto& desc : info.storage_buffers_descriptors) {
+ ASSERT(desc.count == 1);
+ buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
+ desc.cbuf_offset, desc.is_written);
+ ++ssbo_index;
+ }
+ }
+ const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
+ const auto read_handle{[&](const auto& desc, u32 index) {
+ ASSERT(cbufs[desc.cbuf_index].enabled);
+ const u32 index_offset{index << desc.size_shift};
+ const u32 offset{desc.cbuf_offset + index_offset};
+ const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
+ if constexpr (std::is_same_v<decltype(desc), const TextureDescriptor&> ||
+ std::is_same_v<decltype(desc), const TextureBufferDescriptor&>) {
+ if (desc.has_secondary) {
+ ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
+ const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
+ const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
+ second_offset};
+ const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
+ const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
+ const u32 raw{lhs_raw | rhs_raw};
+ return TexturePair(raw, via_header_index);
+ }
+ }
+ return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
+ }};
+ const auto add_image{[&](const auto& desc) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ }
+ }};
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+ samplers[sampler_binding++] = 0;
+ }
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_image(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ const auto handle{read_handle(desc, index)};
+ image_view_indices[image_view_index++] = handle.first;
+
+ Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
+ samplers[sampler_binding++] = sampler->Handle();
+ }
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ add_image(desc);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ config_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ config_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ config_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ config_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ config_stage(4);
+ }
+ const std::span indices_span(image_view_indices.data(), image_view_index);
+ texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
+
+ texture_cache.UpdateRenderTargets(false);
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+ ImageId* texture_buffer_index{image_view_ids.data()};
+ const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
+ size_t index{};
+ const auto add_buffer{[&](const auto& desc) {
+ constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
+ for (u32 i = 0; i < desc.count; ++i) {
+ bool is_written{false};
+ if constexpr (is_image) {
+ is_written = desc.is_written;
+ }
+ ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
+ buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
+ image_view.BufferSize(), image_view.format,
+ is_written, is_image);
+ ++index;
+ ++texture_buffer_index;
+ }
+ }};
+ const Shader::Info& info{stage_infos[stage]};
+ buffer_cache.UnbindGraphicsTextureBuffers(stage);
+
+ if constexpr (Spec::has_texture_buffers) {
+ for (const auto& desc : info.texture_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ if constexpr (Spec::has_image_buffers) {
+ for (const auto& desc : info.image_buffer_descriptors) {
+ add_buffer(desc);
+ }
+ }
+ for (const auto& desc : info.texture_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ if constexpr (Spec::has_images) {
+ for (const auto& desc : info.image_descriptors) {
+ texture_buffer_index += desc.count;
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ bind_stage_info(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ bind_stage_info(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ bind_stage_info(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ bind_stage_info(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ bind_stage_info(4);
+ }
+ buffer_cache.UpdateGraphicsBuffers(is_indexed);
+ buffer_cache.BindHostGeometryBuffers(is_indexed);
+
+ if (!is_built.load(std::memory_order::relaxed)) {
+ WaitForBuild();
+ }
+ if (assembly_programs[0].handle != 0) {
+ program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
+ } else {
+ program_manager.BindSourcePrograms(source_programs);
+ }
+ const ImageId* views_it{image_view_ids.data()};
+ GLsizei texture_binding = 0;
+ GLsizei image_binding = 0;
+ std::array<GLuint, MAX_TEXTURES> textures;
+ std::array<GLuint, MAX_IMAGES> images;
+ const auto prepare_stage{[&](size_t stage) {
+ buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
+ buffer_cache.BindHostStageBuffers(stage);
+
+ texture_binding += num_texture_buffers[stage];
+ image_binding += num_image_buffers[stage];
+
+ views_it += num_texture_buffers[stage];
+ views_it += num_image_buffers[stage];
+
+ const auto& info{stage_infos[stage]};
+ for (const auto& desc : info.texture_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ textures[texture_binding++] = image_view.Handle(desc.type);
+ }
+ }
+ for (const auto& desc : info.image_descriptors) {
+ for (u32 index = 0; index < desc.count; ++index) {
+ ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
+ if (desc.is_written) {
+ texture_cache.MarkModification(image_view.image_id);
+ }
+ images[image_binding++] = image_view.StorageView(desc.type, desc.format);
+ }
+ }
+ }};
+ if constexpr (Spec::enabled_stages[0]) {
+ prepare_stage(0);
+ }
+ if constexpr (Spec::enabled_stages[1]) {
+ prepare_stage(1);
+ }
+ if constexpr (Spec::enabled_stages[2]) {
+ prepare_stage(2);
+ }
+ if constexpr (Spec::enabled_stages[3]) {
+ prepare_stage(3);
+ }
+ if constexpr (Spec::enabled_stages[4]) {
+ prepare_stage(4);
+ }
+ if (texture_binding != 0) {
+ ASSERT(texture_binding == sampler_binding);
+ glBindTextures(0, texture_binding, textures.data());
+ glBindSamplers(0, sampler_binding, samplers.data());
+ }
+ if (image_binding != 0) {
+ glBindImageTextures(0, image_binding, images.data());
+ }
+}
+
+void GraphicsPipeline::ConfigureTransformFeedbackImpl() const {
+ glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides,
+ xfb_streams.data(), GL_INTERLEAVED_ATTRIBS);
+}
+
+void GraphicsPipeline::GenerateTransformFeedbackState() {
+ // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
+ // when this is required.
+ GLint* cursor{xfb_attribs.data()};
+ GLint* current_stream{xfb_streams.data()};
+
+ for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
+ const auto& layout = key.xfb_state.layouts[feedback];
+ UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
+ if (layout.varying_count == 0) {
+ continue;
+ }
+ *current_stream = static_cast<GLint>(feedback);
+ if (current_stream != xfb_streams.data()) {
+ // When stepping one stream, push the expected token
+ cursor[0] = GL_NEXT_BUFFER_NV;
+ cursor[1] = 0;
+ cursor[2] = 0;
+ cursor += XFB_ENTRY_STRIDE;
+ }
+ ++current_stream;
+
+ const auto& locations = key.xfb_state.varyings[feedback];
+ std::optional<u8> current_index;
+ for (u32 offset = 0; offset < layout.varying_count; ++offset) {
+ const u8 location = locations[offset];
+ const u8 index = location / 4;
+
+ if (current_index == index) {
+ // Increase number of components of the previous attachment
+ ++cursor[-2];
+ continue;
+ }
+ current_index = index;
+
+ std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
+ cursor[1] = 1;
+ cursor += XFB_ENTRY_STRIDE;
+ }
+ }
+ num_xfb_attribs = static_cast<GLsizei>((cursor - xfb_attribs.data()) / XFB_ENTRY_STRIDE);
+ num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data());
+}
+
+void GraphicsPipeline::WaitForBuild() {
+ std::unique_lock lock{built_mutex};
+ built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
new file mode 100644
index 000000000..4e28d9a42
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -0,0 +1,169 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstring>
+#include <type_traits>
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "shader_recompiler/shader_info.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_texture_cache.h"
+#include "video_core/transform_feedback.h"
+
+namespace OpenGL {
+
+namespace ShaderContext {
+struct Context;
+}
+
+class Device;
+class ProgramManager;
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
+
+struct GraphicsPipelineKey {
+ std::array<u64, 6> unique_hashes;
+ union {
+ u32 raw;
+ BitField<0, 1, u32> xfb_enabled;
+ BitField<1, 1, u32> early_z;
+ BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
+ BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
+ BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
+ BitField<10, 1, u32> tessellation_clockwise;
+ };
+ std::array<u32, 3> padding;
+ VideoCommon::TransformFeedbackState xfb_state;
+
+ size_t Hash() const noexcept {
+ return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
+ }
+
+ bool operator==(const GraphicsPipelineKey& rhs) const noexcept {
+ return std::memcmp(this, &rhs, Size()) == 0;
+ }
+
+ bool operator!=(const GraphicsPipelineKey& rhs) const noexcept {
+ return !operator==(rhs);
+ }
+
+ [[nodiscard]] size_t Size() const noexcept {
+ if (xfb_enabled != 0) {
+ return sizeof(GraphicsPipelineKey);
+ } else {
+ return offsetof(GraphicsPipelineKey, padding);
+ }
+ }
+};
+static_assert(std::has_unique_object_representations_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_copyable_v<GraphicsPipelineKey>);
+static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
+
+class GraphicsPipeline {
+public:
+ explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
+ std::array<std::string, 5> sources,
+ std::array<std::vector<u32>, 5> sources_spirv,
+ const std::array<const Shader::Info*, 5>& infos,
+ const GraphicsPipelineKey& key_);
+
+ void Configure(bool is_indexed) {
+ configure_func(this, is_indexed);
+ }
+
+ void ConfigureTransformFeedback() const {
+ if (num_xfb_attribs != 0) {
+ ConfigureTransformFeedbackImpl();
+ }
+ }
+
+ [[nodiscard]] const GraphicsPipelineKey& Key() const noexcept {
+ return key;
+ }
+
+ [[nodiscard]] bool WritesGlobalMemory() const noexcept {
+ return writes_global_memory;
+ }
+
+ [[nodiscard]] bool IsBuilt() const noexcept {
+ return is_built.load(std::memory_order::relaxed);
+ }
+
+ template <typename Spec>
+ static auto MakeConfigureSpecFunc() {
+ return [](GraphicsPipeline* pipeline, bool is_indexed) {
+ pipeline->ConfigureImpl<Spec>(is_indexed);
+ };
+ }
+
+private:
+ template <typename Spec>
+ void ConfigureImpl(bool is_indexed);
+
+ void ConfigureTransformFeedbackImpl() const;
+
+ void GenerateTransformFeedbackState();
+
+ void WaitForBuild();
+
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+ const GraphicsPipelineKey key;
+
+ void (*configure_func)(GraphicsPipeline*, bool){};
+
+ std::array<OGLProgram, 5> source_programs;
+ std::array<OGLAssemblyProgram, 5> assembly_programs;
+ u32 enabled_stages_mask{};
+
+ std::array<Shader::Info, 5> stage_infos{};
+ std::array<u32, 5> enabled_uniform_buffer_masks{};
+ VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
+ std::array<u32, 5> base_uniform_bindings{};
+ std::array<u32, 5> base_storage_bindings{};
+ std::array<u32, 5> num_texture_buffers{};
+ std::array<u32, 5> num_image_buffers{};
+
+ bool use_storage_buffers{};
+ bool writes_global_memory{};
+
+ static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
+ GLsizei num_xfb_attribs{};
+ GLsizei num_xfb_strides{};
+ std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{};
+ std::array<GLint, Maxwell::NumTransformFeedbackBuffers> xfb_streams{};
+
+ std::mutex built_mutex;
+ std::condition_variable built_condvar;
+ std::atomic_bool is_built{false};
+};
+
+} // namespace OpenGL
+
+namespace std {
+template <>
+struct hash<OpenGL::GraphicsPipelineKey> {
+ size_t operator()(const OpenGL::GraphicsPipelineKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eb8bdaa85..41d2b73f4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -23,7 +23,6 @@
#include "core/memory.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
@@ -40,7 +39,6 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using GLvec4 = std::array<GLfloat, 4>;
-using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
@@ -51,112 +49,11 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
-
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
-struct TextureHandle {
- constexpr TextureHandle(u32 data, bool via_header_index) {
- const Tegra::Texture::TextureHandle handle{data};
- image = handle.tic_id;
- sampler = via_header_index ? image : handle.tsc_id.Value();
- }
-
- u32 image;
- u32 sampler;
-};
-
-template <typename Engine, typename Entry>
-TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
- ShaderType shader_type, size_t index = 0) {
- if constexpr (std::is_same_v<Entry, SamplerEntry>) {
- if (entry.is_separated) {
- const u32 buffer_1 = entry.buffer;
- const u32 buffer_2 = entry.secondary_buffer;
- const u32 offset_1 = entry.offset;
- const u32 offset_2 = entry.secondary_offset;
- const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
- const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
- return TextureHandle(handle_1 | handle_2, via_header_index);
- }
- }
- if (entry.is_bindless) {
- const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
- return TextureHandle(raw, via_header_index);
- }
- const u32 buffer = engine.GetBoundBuffer();
- const u64 offset = (entry.offset + index) * sizeof(u32);
- return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
-}
-
-/// Translates hardware transform feedback indices
-/// @param location Hardware location
-/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
-/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
-std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
- const u8 index = location / 4;
- if (index >= 8 && index <= 39) {
- return {GL_GENERIC_ATTRIB_NV, index - 8};
- }
- if (index >= 48 && index <= 55) {
- return {GL_TEXTURE_COORD_NV, index - 48};
- }
- switch (index) {
- case 7:
- return {GL_POSITION, 0};
- case 40:
- return {GL_PRIMARY_COLOR_NV, 0};
- case 41:
- return {GL_SECONDARY_COLOR_NV, 0};
- case 42:
- return {GL_BACK_PRIMARY_COLOR_NV, 0};
- case 43:
- return {GL_BACK_SECONDARY_COLOR_NV, 0};
- }
- UNIMPLEMENTED_MSG("index={}", index);
- return {GL_POSITION, 0};
-}
-
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
-
-ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
- if (entry.is_buffer) {
- return ImageViewType::Buffer;
- }
- switch (entry.type) {
- case Tegra::Shader::TextureType::Texture1D:
- return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
- case Tegra::Shader::TextureType::Texture2D:
- return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
- case Tegra::Shader::TextureType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::TextureType::TextureCube:
- return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
-ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
- switch (entry.type) {
- case Tegra::Shader::ImageType::Texture1D:
- return ImageViewType::e1D;
- case Tegra::Shader::ImageType::Texture1DArray:
- return ImageViewType::e1DArray;
- case Tegra::Shader::ImageType::Texture2D:
- return ImageViewType::e2D;
- case Tegra::Shader::ImageType::Texture2DArray:
- return ImageViewType::e2DArray;
- case Tegra::Shader::ImageType::Texture3D:
- return ImageViewType::e3D;
- case Tegra::Shader::ImageType::TextureBuffer:
- return ImageViewType::Buffer;
- }
- UNREACHABLE();
- return ImageViewType::e2D;
-}
-
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -170,14 +67,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
- query_cache(*this, maxwell3d, gpu_memory),
- fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
- async_shaders(emu_window_) {
- if (device.UseAsynchronousShaders()) {
- async_shaders.AllocateWorkers();
- }
-}
+ shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
+ buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
+ query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -204,7 +97,7 @@ void RasterizerOpenGL::SyncVertexFormats() {
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
- if (attrib.IsConstant()) {
+ if (attrib.constant) {
glDisableVertexAttribArray(gl_index);
continue;
}
@@ -244,116 +137,9 @@ void RasterizerOpenGL::SyncVertexInstances() {
}
}
-void RasterizerOpenGL::SetupShaders(bool is_indexed) {
- u32 clip_distances = 0;
-
- std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeGraphicsDescriptors();
-
- for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = maxwell3d.regs.shader_config[index];
- const auto program{static_cast<Maxwell::ShaderProgram>(index)};
-
- // Skip stages that are not enabled
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
- switch (program) {
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(0);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(0);
- break;
- default:
- break;
- }
- continue;
- }
- // Currently this stages are not supported in the OpenGL backend.
- // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
- if (program == Maxwell::ShaderProgram::TesselationControl ||
- program == Maxwell::ShaderProgram::TesselationEval) {
- continue;
- }
-
- Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
- const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
- switch (program) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- program_manager.UseVertexShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Geometry:
- program_manager.UseGeometryShader(program_handle);
- break;
- case Maxwell::ShaderProgram::Fragment:
- program_manager.UseFragmentShader(program_handle);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
- shader_config.enable.Value(), shader_config.offset);
- break;
- }
-
- // Stage indices are 0 - 5
- const size_t stage = index == 0 ? 0 : index - 1;
- shaders[stage] = shader;
-
- SetupDrawTextures(shader, stage);
- SetupDrawImages(shader, stage);
-
- buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
-
- buffer_cache.UnbindGraphicsStorageBuffers(stage);
- u32 ssbo_index = 0;
- for (const auto& buffer : shader->GetEntries().global_memory_entries) {
- buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
- buffer.cbuf_offset, buffer.is_written);
- ++ssbo_index;
- }
-
- // Workaround for Intel drivers.
- // When a clip distance is enabled but not set in the shader it crops parts of the screen
- // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
- // clip distances only when it's written by a shader stage.
- clip_distances |= shader->GetEntries().clip_distances;
-
- // When VertexA is enabled, we have dual vertex shaders
- if (program == Maxwell::ShaderProgram::VertexA) {
- // VertexB was combined with VertexA, so we skip the VertexB iteration
- ++index;
- }
- }
- SyncClipEnabled(clip_distances);
- maxwell3d.dirty.flags[Dirty::Shaders] = false;
-
- buffer_cache.UpdateGraphicsBuffers(is_indexed);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
-
- buffer_cache.BindHostGeometryBuffers(is_indexed);
-
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
- const Shader* const shader = shaders[stage];
- if (!shader) {
- continue;
- }
- buffer_cache.BindHostStageBuffers(stage);
- const auto& base = device.GetBaseBindings(stage);
- BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
- texture_index, image_index);
- }
-}
-
void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(title_id, stop_loading, callback);
+ shader_cache.LoadDiskResources(title_id, stop_loading, callback);
}
void RasterizerOpenGL::Clear() {
@@ -432,16 +218,15 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncState();
- // Setup shaders and their used resources.
+ GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
+ if (!pipeline) {
+ return;
+ }
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- SetupShaders(is_indexed);
-
- texture_cache.UpdateRenderTargets(false);
- state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
- program_manager.BindGraphicsPipeline();
+ pipeline->Configure(is_indexed);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
- BeginTransformFeedback(primitive_mode);
+ BeginTransformFeedback(pipeline, primitive_mode);
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
@@ -480,35 +265,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
-
EndTransformFeedback();
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
gpu.TickWork();
}
-void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
- Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
-
- std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
- BindComputeTextures(kernel);
-
- const auto& entries = kernel->GetEntries();
- buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
- buffer_cache.UnbindComputeStorageBuffers();
- u32 ssbo_index = 0;
- for (const auto& buffer : entries.global_memory_entries) {
- buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
- buffer.is_written);
- ++ssbo_index;
- }
- buffer_cache.UpdateComputeBuffers();
- buffer_cache.BindHostComputeBuffers();
-
- const auto& launch_desc = kepler_compute.launch_description;
- glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
+void RasterizerOpenGL::DispatchCompute() {
+ ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
+ if (!pipeline) {
+ return;
+ }
+ pipeline->Configure();
+ const auto& qmd{kepler_compute.launch_description};
+ glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
+ has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -611,6 +385,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
shader_cache.OnCPUWrite(addr, size);
}
+void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
+ {
+ std::scoped_lock lock{texture_cache.mutex};
+ texture_cache.UnmapGPUMemory(addr, size);
+ }
+}
+
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory.Write<u32>(addr, value);
@@ -627,6 +408,13 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
fence_manager.SignalSyncPoint(value);
}
+void RasterizerOpenGL::SignalReference() {
+ if (!gpu.IsAsync()) {
+ return;
+ }
+ fence_manager.SignalOrdering();
+}
+
void RasterizerOpenGL::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
@@ -643,10 +431,11 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
void RasterizerOpenGL::WaitForIdle() {
glMemoryBarrier(GL_ALL_BARRIER_BITS);
+ SignalReference();
}
void RasterizerOpenGL::FragmentBarrier() {
- glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
+ glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}
void RasterizerOpenGL::TiledCacheBarrier() {
@@ -659,6 +448,13 @@ void RasterizerOpenGL::FlushCommands() {
return;
}
num_queued_commands = 0;
+
+ // Make sure memory stored from the previous GL command stream is visible
+ // This is only needed on assembly shaders where we write to GPU memory with raw pointers
+ if (has_written_global_memory) {
+ has_written_global_memory = false;
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+ }
glFlush();
}
@@ -686,6 +482,10 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf
return true;
}
+Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
+ return accelerate_dma;
+}
+
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) {
@@ -702,111 +502,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
- screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
+ screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}
-void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
- image_view_indices.clear();
- sampler_handles.clear();
-
- texture_cache.SynchronizeComputeDescriptors();
-
- SetupComputeTextures(kernel);
- SetupComputeImages(kernel);
-
- const std::span indices_span(image_view_indices.data(), image_view_indices.size());
- texture_cache.FillComputeImageViews(indices_span, image_view_ids);
-
- program_manager.BindCompute(kernel->GetHandle());
- size_t image_view_index = 0;
- size_t texture_index = 0;
- size_t image_index = 0;
- BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
-}
-
-void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
- GLuint base_image, size_t& image_view_index,
- size_t& texture_index, size_t& image_index) {
- const GLuint* const samplers = sampler_handles.data() + texture_index;
- const GLuint* const textures = texture_handles.data() + texture_index;
- const GLuint* const images = image_handles.data() + image_index;
-
- const size_t num_samplers = entries.samplers.size();
- for (const auto& sampler : entries.samplers) {
- for (size_t i = 0; i < sampler.size; ++i) {
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
- texture_handles[texture_index++] = handle;
- }
- }
- const size_t num_images = entries.images.size();
- for (size_t unit = 0; unit < num_images; ++unit) {
- // TODO: Mark as modified
- const ImageViewId image_view_id = image_view_ids[image_view_index++];
- const ImageView& image_view = texture_cache.GetImageView(image_view_id);
- const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
- image_handles[image_index] = handle;
- ++image_index;
- }
- if (num_samplers > 0) {
- glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
- glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
- }
- if (num_images > 0) {
- glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
- }
-}
-
-void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().samplers) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- for (size_t index = 0; index < entry.size; ++index) {
- const auto handle =
- GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
- const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : kernel->GetEntries().samplers) {
- for (size_t i = 0; i < entry.size; ++i) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
- const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
- sampler_handles.push_back(sampler->Handle());
- image_view_indices.push_back(handle.image);
- }
- }
-}
-
-void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
- const bool via_header_index =
- maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
- for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<ShaderType>(stage_index);
- const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
- image_view_indices.push_back(handle.image);
- }
-}
-
-void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
- const bool via_header_index = kepler_compute.launch_description.linked_tsc;
- for (const auto& entry : shader->GetEntries().images) {
- const auto handle =
- GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
- image_view_indices.push_back(handle.image);
- }
-}
-
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
@@ -922,7 +622,7 @@ void RasterizerOpenGL::SyncDepthClamp() {
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
auto& flags = maxwell3d.dirty.flags;
- if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+ if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
@@ -1299,68 +999,13 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
}
-void RasterizerOpenGL::SyncTransformFeedback() {
- // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
- // when this is required.
- const auto& regs = maxwell3d.regs;
-
- static constexpr std::size_t STRIDE = 3;
- std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
- std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
-
- GLint* cursor = attribs.data();
- GLint* current_stream = streams.data();
-
- for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
- const auto& layout = regs.tfb_layouts[feedback];
- UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
- if (layout.varying_count == 0) {
- continue;
- }
-
- *current_stream = static_cast<GLint>(feedback);
- if (current_stream != streams.data()) {
- // When stepping one stream, push the expected token
- cursor[0] = GL_NEXT_BUFFER_NV;
- cursor[1] = 0;
- cursor[2] = 0;
- cursor += STRIDE;
- }
- ++current_stream;
-
- const auto& locations = regs.tfb_varying_locs[feedback];
- std::optional<u8> current_index;
- for (u32 offset = 0; offset < layout.varying_count; ++offset) {
- const u8 location = locations[offset];
- const u8 index = location / 4;
-
- if (current_index == index) {
- // Increase number of components of the previous attachment
- ++cursor[-2];
- continue;
- }
- current_index = index;
-
- std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
- cursor[1] = 1;
- cursor += STRIDE;
- }
- }
-
- const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
- const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
- glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
- GL_INTERLEAVED_ATTRIBS);
-}
-
-void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
+void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
- if (device.UseAssemblyShaders()) {
- SyncTransformFeedback();
- }
+ program->ConfigureTransformFeedback();
+
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1374,11 +1019,21 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
}
void RasterizerOpenGL::EndTransformFeedback() {
- const auto& regs = maxwell3d.regs;
- if (regs.tfb_enabled == 0) {
- return;
+ if (maxwell3d.regs.tfb_enabled != 0) {
+ glEndTransformFeedback();
}
- glEndTransformFeedback();
+}
+
+AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {}
+
+bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMACopy(src_address, dest_address, amount);
+}
+
+bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
+ std::scoped_lock lock{buffer_cache.mutex};
+ return buffer_cache.DMAClear(src_address, amount, value);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9995a563b..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,6 +19,7 @@
#include "common/common_types.h"
#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -27,11 +28,9 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
-#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
namespace Core::Memory {
@@ -58,6 +57,18 @@ struct BindlessSSBO {
};
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
+class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface {
+public:
+ explicit AccelerateDMA(BufferCache& buffer_cache);
+
+ bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override;
+
+ bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override;
+
+private:
+ BufferCache& buffer_cache;
+};
+
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -68,7 +79,7 @@ public:
void Draw(bool is_indexed, bool is_instanced) override;
void Clear() override;
- void DispatchCompute(GPUVAddr code_addr) override;
+ void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -80,8 +91,10 @@ public:
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void UnmapMemory(VAddr addr, u64 size) override;
+ void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalSyncPoint(u32 value) override;
+ void SignalReference() override;
void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitForIdle() override;
@@ -92,6 +105,7 @@ public:
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) override;
+ Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
@@ -102,36 +116,11 @@ public:
return num_queued_commands > 0;
}
- VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
- return async_shaders;
- }
-
- const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
- return async_shaders;
- }
-
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
- void BindComputeTextures(Shader* kernel);
-
- void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
- size_t& image_view_index, size_t& texture_index, size_t& image_index);
-
- /// Configures the current textures to use for the draw command.
- void SetupDrawTextures(const Shader* shader, size_t stage_index);
-
- /// Configures the textures used in a compute shader.
- void SetupComputeTextures(const Shader* kernel);
-
- /// Configures images in a graphics shader.
- void SetupDrawImages(const Shader* shader, size_t stage_index);
-
- /// Configures images in a compute shader.
- void SetupComputeImages(const Shader* shader);
-
/// Syncs state to match guest's
void SyncState();
@@ -204,18 +193,12 @@ private:
/// Syncs vertex instances to match the guest state
void SyncVertexInstances();
- /// Syncs transform feedback state to match guest state
- /// @note Only valid on assembly shaders
- void SyncTransformFeedback();
-
/// Begin a transform feedback
- void BeginTransformFeedback(GLenum primitive_mode);
+ void BeginTransformFeedback(GraphicsPipeline* pipeline, GLenum primitive_mode);
/// End a transform feedback
void EndTransformFeedback();
- void SetupShaders(bool is_indexed);
-
Tegra::GPU& gpu;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
@@ -230,12 +213,11 @@ private:
TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime;
BufferCache buffer_cache;
- ShaderCacheOpenGL shader_cache;
+ ShaderCache shader_cache;
QueryCache query_cache;
+ AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
- VideoCommon::Shader::AsyncShaders async_shaders;
-
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
@@ -243,7 +225,8 @@ private:
std::array<GLuint, MAX_IMAGES> image_handles{};
/// Number of commands queued to the OpenGL driver. Resetted on flush.
- std::size_t num_queued_commands = 0;
+ size_t num_queued_commands = 0;
+ bool has_written_global_memory = false;
u32 last_clip_distance_mask = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 3428e5e21..8695c29e3 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -83,18 +83,6 @@ void OGLSampler::Release() {
handle = 0;
}
-void OGLShader::Create(std::string_view source, GLenum type) {
- if (handle != 0) {
- return;
- }
- if (source.empty()) {
- return;
- }
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- handle = GLShader::LoadShader(source, type);
-}
-
void OGLShader::Release() {
if (handle == 0)
return;
@@ -104,21 +92,6 @@ void OGLShader::Release() {
handle = 0;
}
-void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
- const char* frag_shader, bool separable_program,
- bool hint_retrievable) {
- OGLShader vert, geo, frag;
- if (vert_shader)
- vert.Create(vert_shader, GL_VERTEX_SHADER);
- if (geo_shader)
- geo.Create(geo_shader, GL_GEOMETRY_SHADER);
- if (frag_shader)
- frag.Create(frag_shader, GL_FRAGMENT_SHADER);
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
-}
-
void OGLProgram::Release() {
if (handle == 0)
return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 552d79db4..b2d5bfd3b 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -8,7 +8,6 @@
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_shader_util.h"
namespace OpenGL {
@@ -128,8 +127,6 @@ public:
return *this;
}
- void Create(std::string_view source, GLenum type);
-
void Release();
GLuint handle = 0;
@@ -151,17 +148,6 @@ public:
return *this;
}
- template <typename... T>
- void Create(bool separable_program, bool hint_retrievable, T... shaders) {
- if (handle != 0)
- return;
- handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
- }
-
- /// Creates a new internal OpenGL resource and stores the handle
- void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
- bool separable_program = false, bool hint_retrievable = false);
-
/// Deletes the internal OpenGL resource
void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5a01c59ec..1f4dda17e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,606 +3,543 @@
// Refer to the license.txt file included.
#include <atomic>
+#include <fstream>
#include <functional>
#include <mutex>
-#include <optional>
#include <string>
#include <thread>
-#include <unordered_set>
#include "common/alignment.h"
#include "common/assert.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
+#include "common/settings.h"
+#include "common/thread_worker.h"
#include "core/core.h"
-#include "core/frontend/emu_window.h"
+#include "shader_recompiler/backend/glasm/emit_glasm.h"
+#include "shader_recompiler/backend/glsl/emit_glsl.h"
+#include "shader_recompiler/backend/spirv/emit_spirv.h"
+#include "shader_recompiler/frontend/ir/program.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+#include "shader_recompiler/frontend/maxwell/translate_program.h"
+#include "shader_recompiler/profile.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
#include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
-#include "video_core/shader/memory_util.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
+#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::GetShaderAddress;
-using VideoCommon::Shader::GetShaderCode;
-using VideoCommon::Shader::GetUniqueIdentifier;
-using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::Registry;
-using VideoCommon::Shader::ShaderIR;
-using VideoCommon::Shader::STAGE_MAIN_OFFSET;
-
namespace {
-
-constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
-
-/// Gets the shader type from a Maxwell program type
-constexpr GLenum GetGLShaderType(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_SHADER;
- case ShaderType::Geometry:
- return GL_GEOMETRY_SHADER;
- case ShaderType::Fragment:
- return GL_FRAGMENT_SHADER;
- case ShaderType::Compute:
- return GL_COMPUTE_SHADER;
- default:
- return GL_NONE;
- }
+using Shader::Backend::GLASM::EmitGLASM;
+using Shader::Backend::GLSL::EmitGLSL;
+using Shader::Backend::SPIRV::EmitSPIRV;
+using Shader::Maxwell::MergeDualVertexPrograms;
+using Shader::Maxwell::TranslateProgram;
+using VideoCommon::ComputeEnvironment;
+using VideoCommon::FileEnvironment;
+using VideoCommon::GenericEnvironment;
+using VideoCommon::GraphicsEnvironment;
+using VideoCommon::LoadPipelines;
+using VideoCommon::SerializePipeline;
+using Context = ShaderContext::Context;
+
+constexpr u32 CACHE_VERSION = 5;
+
+template <typename Container>
+auto MakeSpan(Container& container) {
+ return std::span(container.data(), container.size());
}
-constexpr const char* GetShaderTypeName(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return "VS";
- case ShaderType::TesselationControl:
- return "HS";
- case ShaderType::TesselationEval:
- return "DS";
- case ShaderType::Geometry:
- return "GS";
- case ShaderType::Fragment:
- return "FS";
- case ShaderType::Compute:
- return "CS";
- }
- return "UNK";
+Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
+ const Shader::IR::Program& program,
+ const Shader::IR::Program* previous_program,
+ bool glasm_use_storage_buffers, bool use_assembly_shaders) {
+ Shader::RuntimeInfo info;
+ if (previous_program) {
+ info.previous_stage_stores = previous_program->info.stores;
+ } else {
+ // Mark all stores as available for vertex shaders
+ info.previous_stage_stores.mask.set();
+ }
+ switch (program.stage) {
+ case Shader::Stage::VertexB:
+ case Shader::Stage::Geometry:
+ if (!use_assembly_shaders && key.xfb_enabled != 0) {
+ info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
+ }
+ break;
+ case Shader::Stage::TessellationEval:
+ info.tess_clockwise = key.tessellation_clockwise != 0;
+ info.tess_primitive = [&key] {
+ switch (key.tessellation_primitive) {
+ case Maxwell::TessellationPrimitive::Isolines:
+ return Shader::TessPrimitive::Isolines;
+ case Maxwell::TessellationPrimitive::Triangles:
+ return Shader::TessPrimitive::Triangles;
+ case Maxwell::TessellationPrimitive::Quads:
+ return Shader::TessPrimitive::Quads;
+ }
+ UNREACHABLE();
+ return Shader::TessPrimitive::Triangles;
+ }();
+ info.tess_spacing = [&] {
+ switch (key.tessellation_spacing) {
+ case Maxwell::TessellationSpacing::Equal:
+ return Shader::TessSpacing::Equal;
+ case Maxwell::TessellationSpacing::FractionalOdd:
+ return Shader::TessSpacing::FractionalOdd;
+ case Maxwell::TessellationSpacing::FractionalEven:
+ return Shader::TessSpacing::FractionalEven;
+ }
+ UNREACHABLE();
+ return Shader::TessSpacing::Equal;
+ }();
+ break;
+ case Shader::Stage::Fragment:
+ info.force_early_z = key.early_z != 0;
+ break;
+ default:
+ break;
+ }
+ switch (key.gs_input_topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ info.input_topology = Shader::InputTopology::Points;
+ break;
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineLoop:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ info.input_topology = Shader::InputTopology::Lines;
+ break;
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ case Maxwell::PrimitiveTopology::Quads:
+ case Maxwell::PrimitiveTopology::QuadStrip:
+ case Maxwell::PrimitiveTopology::Polygon:
+ case Maxwell::PrimitiveTopology::Patches:
+ info.input_topology = Shader::InputTopology::Triangles;
+ break;
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ info.input_topology = Shader::InputTopology::LinesAdjacency;
+ break;
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ info.input_topology = Shader::InputTopology::TrianglesAdjacency;
+ break;
+ }
+ info.glasm_use_storage_buffers = glasm_use_storage_buffers;
+ return info;
}
-constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
- switch (program_type) {
- case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB:
- return ShaderType::Vertex;
- case Maxwell::ShaderProgram::TesselationControl:
- return ShaderType::TesselationControl;
- case Maxwell::ShaderProgram::TesselationEval:
- return ShaderType::TesselationEval;
- case Maxwell::ShaderProgram::Geometry:
- return ShaderType::Geometry;
- case Maxwell::ShaderProgram::Fragment:
- return ShaderType::Fragment;
- }
- return {};
+void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
+ std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
+ return VideoCommon::TransformFeedbackState::Layout{
+ .stream = layout.stream,
+ .varying_count = layout.varying_count,
+ .stride = layout.stride,
+ };
+ });
+ state.varyings = regs.tfb_varying_locs;
}
+} // Anonymous namespace
-constexpr GLenum AssemblyEnum(ShaderType shader_type) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GL_VERTEX_PROGRAM_NV;
- case ShaderType::TesselationControl:
- return GL_TESS_CONTROL_PROGRAM_NV;
- case ShaderType::TesselationEval:
- return GL_TESS_EVALUATION_PROGRAM_NV;
- case ShaderType::Geometry:
- return GL_GEOMETRY_PROGRAM_NV;
- case ShaderType::Fragment:
- return GL_FRAGMENT_PROGRAM_NV;
- case ShaderType::Compute:
- return GL_COMPUTE_PROGRAM_NV;
+ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ VideoCore::ShaderNotify& shader_notify_)
+ : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
+ emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
+ buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
+ shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+ profile{
+ .supported_spirv = 0x00010000,
+
+ .unified_descriptor_binding = false,
+ .support_descriptor_aliasing = false,
+ .support_int8 = false,
+ .support_int16 = false,
+ .support_int64 = device.HasShaderInt64(),
+ .support_vertex_instance_id = true,
+ .support_float_controls = false,
+ .support_separate_denorm_behavior = false,
+ .support_separate_rounding_mode = false,
+ .support_fp16_denorm_preserve = false,
+ .support_fp32_denorm_preserve = false,
+ .support_fp16_denorm_flush = false,
+ .support_fp32_denorm_flush = false,
+ .support_fp16_signed_zero_nan_preserve = false,
+ .support_fp32_signed_zero_nan_preserve = false,
+ .support_fp64_signed_zero_nan_preserve = false,
+ .support_explicit_workgroup_layout = false,
+ .support_vote = true,
+ .support_viewport_index_layer_non_geometry =
+ device.HasNvViewportArray2() || device.HasVertexViewportLayer(),
+ .support_viewport_mask = device.HasNvViewportArray2(),
+ .support_typeless_image_loads = device.HasImageLoadFormatted(),
+ .support_demote_to_helper_invocation = false,
+ .support_int64_atomics = false,
+ .support_derivative_control = device.HasDerivativeControl(),
+ .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
+ .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
+ .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
+ .support_gl_texture_shadow_lod = device.HasTextureShadowLod(),
+ .support_gl_warp_intrinsics = false,
+ .support_gl_variable_aoffi = device.HasVariableAoffi(),
+ .support_gl_sparse_textures = device.HasSparseTexture2(),
+ .support_gl_derivative_control = device.HasDerivativeControl(),
+
+ .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
+
+ .lower_left_origin_mode = true,
+ .need_declared_frag_colors = true,
+ .need_fastmath_off = device.NeedsFastmathOff(),
+
+ .has_broken_spirv_clamp = true,
+ .has_broken_unsigned_image_offsets = true,
+ .has_broken_signed_operations = true,
+ .has_broken_fp16_float_controls = false,
+ .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
+ .has_gl_precise_bug = device.HasPreciseBug(),
+ .ignore_nan_fp_comparisons = true,
+ .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
+ },
+ host_info{
+ .support_float16 = false,
+ .support_int64 = device.HasShaderInt64(),
+ } {
+ if (use_asynchronous_shaders) {
+ workers = CreateWorkers();
}
- return {};
}
-std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
- return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
-}
+ShaderCache::~ShaderCache() = default;
-std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
- const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
- const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
- entry.graphics_info, entry.compute_info};
- auto registry = std::make_shared<Registry>(entry.type, info);
- for (const auto& [address, value] : entry.keys) {
- const auto [buffer, offset] = address;
- registry->InsertKey(buffer, offset, value);
- }
- for (const auto& [offset, sampler] : entry.bound_samplers) {
- registry->InsertBoundSampler(offset, sampler);
+void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback) {
+ if (title_id == 0) {
+ return;
}
- for (const auto& [key, sampler] : entry.bindless_samplers) {
- const auto [buffer, offset] = key;
- registry->InsertBindlessSampler(buffer, offset, sampler);
+ const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
+ const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
+ if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
+ LOG_ERROR(Common_Filesystem, "Failed to create shader cache directories");
+ return;
}
- return registry;
-}
-
-std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats;
- glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
+ shader_cache_filename = base_dir / "opengl.bin";
+
+ if (!workers) {
+ workers = CreateWorkers();
+ }
+ struct {
+ std::mutex mutex;
+ size_t total{};
+ size_t built{};
+ bool has_loaded{};
+ } state;
+
+ const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
+ ComputePipelineKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+ workers->QueueWork(
+ [this, key, env = std::move(env), &state, &callback](Context* ctx) mutable {
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(ctx->pools, key, env)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ compute_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
+ GraphicsPipelineKey key;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key));
+ workers->QueueWork(
+ [this, key, envs = std::move(envs), &state, &callback](Context* ctx) mutable {
+ boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
+ for (auto& env : envs) {
+ env_ptrs.push_back(&env);
+ }
+ ctx->pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
+ std::lock_guard lock{state.mutex};
+ if (pipeline) {
+ graphics_cache.emplace(key, std::move(pipeline));
+ }
+ ++state.built;
+ if (state.has_loaded) {
+ callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
+ }
+ });
+ ++state.total;
+ }};
+ LoadPipelines(stop_loading, shader_cache_filename, CACHE_VERSION, load_compute, load_graphics);
- std::vector<GLint> formats(num_formats);
- glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
+ std::unique_lock lock{state.mutex};
+ callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
+ state.has_loaded = true;
+ lock.unlock();
- std::unordered_set<GLenum> supported_formats;
- for (const GLint format : formats) {
- supported_formats.insert(static_cast<GLenum>(format));
+ workers->WaitForRequests();
+ if (!use_asynchronous_shaders) {
+ workers.reset();
}
- return supported_formats;
}
-} // Anonymous namespace
-
-ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
- const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
- if (device.UseDriverCache()) {
- // Ignore hint retrievable if we are using the driver cache
- hint_retrievable = false;
- }
- const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
- LOG_INFO(Render_OpenGL, "{}", shader_id);
-
- auto program = std::make_shared<ProgramHandle>();
-
- if (device.UseAssemblyShaders()) {
- const std::string arb =
- DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
-
- GLuint& arb_prog = program->assembly_program.handle;
-
-// Commented out functions signal OpenGL errors but are compatible with apitrace.
-// Use them only to capture and replay on apitrace.
-#if 0
- glGenProgramsNV(1, &arb_prog);
- glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
- reinterpret_cast<const GLubyte*>(arb.data()));
-#else
- glGenProgramsARB(1, &arb_prog);
- glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
- static_cast<GLsizei>(arb.size()), arb.data());
-#endif
- const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
- if (err && *err) {
- LOG_CRITICAL(Render_OpenGL, "{}", err);
- LOG_INFO(Render_OpenGL, "\n{}", arb);
- }
- } else {
- const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
- OGLShader shader;
- shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
-
- program->source_program.Create(true, hint_retrievable, shader.handle);
- }
-
- return program;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
+ if (!RefreshStages(graphics_key.unique_hashes)) {
+ current_pipeline = nullptr;
+ return nullptr;
+ }
+ const auto& regs{maxwell3d.regs};
+ graphics_key.raw = 0;
+ graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+ graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
+ ? regs.draw.topology.Value()
+ : Maxwell::PrimitiveTopology{});
+ graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
+ graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
+ graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
+ graphics_key.xfb_enabled.Assign(regs.tfb_enabled != 0 ? 1 : 0);
+ if (graphics_key.xfb_enabled) {
+ SetXfbState(graphics_key.xfb_state, regs);
+ }
+ if (current_pipeline && graphics_key == current_pipeline->Key()) {
+ return BuiltPipeline(current_pipeline);
+ }
+ return CurrentGraphicsPipelineSlowPath();
}
-Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
- ProgramSharedPtr program_, bool is_built_)
- : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
- is_built{is_built_} {
- handle = program->assembly_program.handle;
- if (handle == 0) {
- handle = program->source_program.handle;
+GraphicsPipeline* ShaderCache::CurrentGraphicsPipelineSlowPath() {
+ const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
+ auto& pipeline{pair->second};
+ if (is_new) {
+ pipeline = CreateGraphicsPipeline();
}
- if (is_built) {
- ASSERT(handle != 0);
+ if (!pipeline) {
+ return nullptr;
}
+ current_pipeline = pipeline.get();
+ return BuiltPipeline(current_pipeline);
}
-Shader::~Shader() = default;
-
-GLuint Shader::GetHandle() const {
- DEBUG_ASSERT(registry->IsConsistent());
- return handle;
-}
-
-bool Shader::IsBuilt() const {
- return is_built;
-}
-
-void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
- program->source_program = std::move(new_program);
- handle = program->source_program.handle;
- is_built = true;
-}
-
-void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
- program->assembly_program = std::move(new_program);
- handle = program->assembly_program.handle;
- is_built = true;
-}
-
-std::unique_ptr<Shader> Shader::CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
- ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
- const auto shader_type = GetShaderType(program_type);
-
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
- if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (!code_b.empty()) {
- // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
- // }
- auto program =
- BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
- ShaderDiskCacheEntry entry;
- entry.type = shader_type;
- entry.code = std::move(code);
- entry.code_b = std::move(code_b);
- entry.unique_identifier = params.unique_identifier;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.graphics_info = registry->GetGraphicsInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, shader_type),
- std::move(program), true));
- } else {
- // Required for entries
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- auto entries = MakeEntries(params.device, ir, shader_type);
-
- async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
- std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
- COMPILER_SETTINGS, *registry, cpu_addr);
-
- auto program = std::make_shared<ProgramHandle>();
- return std::unique_ptr<Shader>(
- new Shader(std::move(registry), std::move(entries), std::move(program), false));
+GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
+ if (pipeline->IsBuilt()) {
+ return pipeline;
}
-}
-
-std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code) {
- auto& gpu = params.gpu;
- gpu.ShaderNotify().MarkSharderBuilding();
-
- auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
- const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
- const u64 uid = params.unique_identifier;
- auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
-
- ShaderDiskCacheEntry entry;
- entry.type = ShaderType::Compute;
- entry.code = std::move(code);
- entry.unique_identifier = uid;
- entry.bound_buffer = registry->GetBoundBuffer();
- entry.compute_info = registry->GetComputeInfo();
- entry.keys = registry->GetKeys();
- entry.bound_samplers = registry->GetBoundSamplers();
- entry.bindless_samplers = registry->GetBindlessSamplers();
- params.disk_cache.SaveEntry(std::move(entry));
-
- gpu.ShaderNotify().MarkShaderComplete();
-
- return std::unique_ptr<Shader>(new Shader(std::move(registry),
- MakeEntries(params.device, ir, ShaderType::Compute),
- std::move(program)));
-}
-
-std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader) {
- return std::unique_ptr<Shader>(new Shader(
- precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
-}
-
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
- Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_)
- : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
- maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
-
-ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-
-void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback) {
- disk_cache.BindTitleID(title_id);
- const std::optional transferable = disk_cache.LoadTransferable();
-
- LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
- transferable.has_value() ? transferable->size() : 0);
-
- if (!transferable) {
- return;
+ if (!use_asynchronous_shaders) {
+ return pipeline;
}
-
- std::vector<ShaderDiskCachePrecompiled> gl_cache;
- if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
- // Only load precompiled cache when we are not using assembly shaders
- gl_cache = disk_cache.LoadPrecompiled();
+ // If something is using depth, we can assume that games are not rendering anything which
+ // will be used one time.
+ if (maxwell3d.regs.zeta_enable) {
+ return nullptr;
}
- const auto supported_formats = GetSupportedFormats();
-
- // Track if precompiled cache was altered during loading to know if we have to
- // serialize the virtual precompiled cache file back to the hard drive
- bool precompiled_cache_altered = false;
-
- // Inform the frontend about shader build initialization
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
+ // If games are using a small index count, we can assume these are full screen quads.
+ // Usually these shaders are only used once for building textures so we can assume they
+ // can't be built async
+ if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ return pipeline;
}
+ return nullptr;
+}
- std::mutex mutex;
- std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
- std::atomic_bool gl_cache_failed = false;
-
- const auto find_precompiled = [&gl_cache](u64 id) {
- return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
- };
-
- const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
- std::size_t end) {
- const auto scope = context->Acquire();
-
- for (std::size_t i = begin; i < end; ++i) {
- if (stop_loading.stop_requested()) {
- return;
- }
- const auto& entry = (*transferable)[i];
- const u64 uid = entry.unique_identifier;
- const auto it = find_precompiled(uid);
- const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
-
- const bool is_compute = entry.type == ShaderType::Compute;
- const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- auto registry = MakeRegistry(entry);
- const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
-
- ProgramSharedPtr program;
- if (precompiled_entry) {
- // If the shader is precompiled, attempt to load it with
- program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
- if (!program) {
- gl_cache_failed = true;
- }
- }
- if (!program) {
- // Otherwise compile it from GLSL
- program = BuildShader(device, entry.type, uid, ir, *registry, true);
- }
-
- PrecompiledShader shader;
- shader.program = std::move(program);
- shader.registry = std::move(registry);
- shader.entries = MakeEntries(device, ir, entry.type);
-
- std::scoped_lock lock{mutex};
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
- transferable->size());
- }
- runtime_cache.emplace(entry.unique_identifier, std::move(shader));
- }
- };
-
- const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
- const std::size_t bucket_size{transferable->size() / num_workers};
- std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
- std::vector<std::thread> threads(num_workers);
- for (std::size_t i = 0; i < num_workers; ++i) {
- const bool is_last_worker = i + 1 == num_workers;
- const std::size_t start{bucket_size * i};
- const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
-
- // On some platforms the shared context has to be created from the GUI thread
- contexts[i] = emu_window.CreateSharedContext();
- threads[i] = std::thread(worker, contexts[i].get(), start, end);
+ComputePipeline* ShaderCache::CurrentComputePipeline() {
+ const VideoCommon::ShaderInfo* const shader{ComputeShader()};
+ if (!shader) {
+ return nullptr;
}
- for (auto& thread : threads) {
- thread.join();
+ const auto& qmd{kepler_compute.launch_description};
+ const ComputePipelineKey key{
+ .unique_hash = shader->unique_hash,
+ .shared_memory_size = qmd.shared_alloc,
+ .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
+ };
+ const auto [pair, is_new]{compute_cache.try_emplace(key)};
+ auto& pipeline{pair->second};
+ if (!is_new) {
+ return pipeline.get();
}
+ pipeline = CreateComputePipeline(key, shader);
+ return pipeline.get();
+}
- if (gl_cache_failed) {
- // Invalidate the precompiled cache if a shader dumped shader was rejected
- disk_cache.InvalidatePrecompiled();
- precompiled_cache_altered = true;
- return;
- }
- if (stop_loading.stop_requested()) {
- return;
- }
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
+ GraphicsEnvironments environments;
+ GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
- if (device.UseAssemblyShaders() || device.UseDriverCache()) {
- // Don't store precompiled binaries for assembly shaders or when using the driver cache
- return;
+ main_pools.ReleaseContents();
+ auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(),
+ use_asynchronous_shaders)};
+ if (!pipeline || shader_cache_filename.empty()) {
+ return pipeline;
}
-
- // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
- // before precompiling them
-
- for (std::size_t i = 0; i < transferable->size(); ++i) {
- const u64 id = (*transferable)[i].unique_identifier;
- const auto it = find_precompiled(id);
- if (it == gl_cache.end()) {
- const GLuint program = runtime_cache.at(id).program->source_program.handle;
- disk_cache.SavePrecompiled(id, program);
- precompiled_cache_altered = true;
+ boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs;
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (graphics_key.unique_hashes[index] != 0) {
+ env_ptrs.push_back(&environments.envs[index]);
}
}
-
- if (precompiled_cache_altered) {
- disk_cache.SaveVirtualPrecompiledFile();
- }
+ SerializePipeline(graphics_key, env_ptrs, shader_cache_filename, CACHE_VERSION);
+ return pipeline;
}
-ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats) {
- if (!supported_formats.contains(precompiled_entry.binary_format)) {
- LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
- return {};
- }
-
- auto program = std::make_shared<ProgramHandle>();
- GLuint& handle = program->source_program.handle;
- handle = glCreateProgram();
- glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
- static_cast<GLsizei>(precompiled_entry.binary.size()));
-
- GLint link_status;
- glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
- if (link_status == GL_FALSE) {
- LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
- return {};
- }
-
- return program;
-}
-
-Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders) {
- if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
- auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
- if (last_shader->IsBuilt()) {
- return last_shader;
+std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
+ ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+ size_t env_index{};
+ u32 total_storage_buffers{};
+ std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
+ const bool uses_vertex_a{key.unique_hashes[0] != 0};
+ const bool uses_vertex_b{key.unique_hashes[1] != 0};
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
}
- }
+ Shader::Environment& env{*envs[env_index]};
+ ++env_index;
- const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
+ const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+ Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ if (!uses_vertex_a || index != 1) {
+ // Normal path
+ programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
- if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
- auto completed_work = async_shaders.GetCompletedWork();
- for (auto& work : completed_work) {
- Shader* shader = TryGet(work.cpu_address);
- gpu.ShaderNotify().MarkShaderComplete();
- if (shader == nullptr) {
- continue;
+ for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
+ total_storage_buffers += desc.count;
}
- using namespace VideoCommon::Shader;
- if (work.backend == AsyncShaders::Backend::OpenGL) {
- shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
- } else if (work.backend == AsyncShaders::Backend::GLASM) {
- shader->AsyncGLASMBuilt(std::move(work.program.glasm));
+ } else {
+ // VertexB path when VertexA is present.
+ auto& program_va{programs[0]};
+ auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+ for (const auto& desc : program_vb.info.storage_buffers_descriptors) {
+ total_storage_buffers += desc.count;
}
-
- auto& registry = shader->GetRegistry();
-
- ShaderDiskCacheEntry entry;
- entry.type = work.shader_type;
- entry.code = std::move(work.code);
- entry.code_b = std::move(work.code_b);
- entry.unique_identifier = work.uid;
- entry.bound_buffer = registry.GetBoundBuffer();
- entry.graphics_info = registry.GetGraphicsInfo();
- entry.keys = registry.GetKeys();
- entry.bound_samplers = registry.GetBoundSamplers();
- entry.bindless_samplers = registry.GetBindlessSamplers();
- disk_cache.SaveEntry(std::move(entry));
+ programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
}
+ const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
+ const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
- // Look up shader in the cache based on address
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
- if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
- return last_shaders[static_cast<std::size_t>(program)] = shader;
- }
-
- const u8* const host_ptr{gpu_memory.GetPointer(address)};
-
- // No shader found - create a new one
- ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
- ProgramCode code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
- code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
- }
- const std::size_t code_size = code.size() * sizeof(u64);
-
- const u64 unique_identifier = GetUniqueIdentifier(
- GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
-
- const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
-
- std::unique_ptr<Shader> shader;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
- async_shaders, cpu_addr.value_or(0));
- } else {
- shader = Shader::CreateFromCache(params, found->second);
- }
+ std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
- Shader* const result = shader.get();
- if (cpu_addr) {
- Register(std::move(shader), *cpu_addr, code_size);
- } else {
- null_shader = std::move(shader);
+ std::array<std::string, 5> sources;
+ std::array<std::vector<u32>, 5> sources_spirv;
+ Shader::Backend::Bindings binding;
+ Shader::IR::Program* previous_program{};
+ const bool use_glasm{device.UseAssemblyShaders()};
+ const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
+ for (size_t index = first_index; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
+ }
+ UNIMPLEMENTED_IF(index == 0);
+
+ Shader::IR::Program& program{programs[index]};
+ const size_t stage_index{index - 1};
+ infos[stage_index] = &program.info;
+
+ const auto runtime_info{
+ MakeRuntimeInfo(key, program, previous_program, glasm_use_storage_buffers, use_glasm)};
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ sources_spirv[stage_index] = EmitSPIRV(profile, runtime_info, program, binding);
+ break;
+ }
+ previous_program = &program;
}
+ auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
+ return std::make_unique<GraphicsPipeline>(
+ device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
+ thread_worker, &shader_notify, sources, sources_spirv, infos, key);
- return last_shaders[static_cast<std::size_t>(program)] = result;
+} catch (Shader::Exception& exception) {
+ LOG_ERROR(Render_OpenGL, "{}", exception.what());
+ return nullptr;
}
-Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
-
- if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
- return kernel;
- }
-
- // No kernel found, create a new one
- const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
- ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
- const std::size_t code_size{code.size() * sizeof(u64)};
- const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
-
- const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+ const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
+ const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
+ const auto& qmd{kepler_compute.launch_description};
+ ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ env.SetCachedSize(shader->size_bytes);
+
+ main_pools.ReleaseContents();
+ auto pipeline{CreateComputePipeline(main_pools, key, env)};
+ if (!pipeline || shader_cache_filename.empty()) {
+ return pipeline;
+ }
+ SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, shader_cache_filename,
+ CACHE_VERSION);
+ return pipeline;
+}
- std::unique_ptr<Shader> kernel;
- const auto found = runtime_cache.find(unique_identifier);
- if (found == runtime_cache.end()) {
- kernel = Shader::CreateKernelFromMemory(params, std::move(code));
- } else {
- kernel = Shader::CreateFromCache(params, found->second);
- }
+std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
+ ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
+ Shader::Environment& env) try {
+ LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
+
+ Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
+ auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
+
+ u32 num_storage_buffers{};
+ for (const auto& desc : program.info.storage_buffers_descriptors) {
+ num_storage_buffers += desc.count;
+ }
+ Shader::RuntimeInfo info;
+ info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
+
+ std::string code{};
+ std::vector<u32> code_spirv;
+ switch (device.GetShaderBackend()) {
+ case Settings::ShaderBackend::GLSL:
+ code = EmitGLSL(profile, program);
+ break;
+ case Settings::ShaderBackend::GLASM:
+ code = EmitGLASM(profile, info, program);
+ break;
+ case Settings::ShaderBackend::SPIRV:
+ code_spirv = EmitSPIRV(profile, program);
+ break;
+ }
+
+ return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
+ kepler_compute, program_manager, program.info, code,
+ code_spirv);
+} catch (Shader::Exception& exception) {
+ LOG_ERROR(Render_OpenGL, "{}", exception.what());
+ return nullptr;
+}
- Shader* const result = kernel.get();
- if (cpu_addr) {
- Register(std::move(kernel), *cpu_addr, code_size);
- } else {
- null_kernel = std::move(kernel);
- }
- return result;
+std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
+ return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
+ "yuzu:ShaderBuilder",
+ [this] { return Context{emu_window}; });
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b30308b6f..a34110b37 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,157 +5,93 @@
#pragma once
#include <array>
-#include <atomic>
-#include <bitset>
-#include <memory>
-#include <string>
-#include <tuple>
+#include <filesystem>
+#include <stop_token>
#include <unordered_map>
-#include <unordered_set>
-#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
+#include "common/thread_worker.h"
+#include "shader_recompiler/frontend/ir/value.h"
+#include "shader_recompiler/host_translate_info.h"
+#include "shader_recompiler/object_pool.h"
+#include "shader_recompiler/profile.h"
+#include "video_core/renderer_opengl/gl_compute_pipeline.h"
+#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
+#include "video_core/renderer_opengl/gl_shader_context.h"
#include "video_core/shader_cache.h"
namespace Tegra {
class MemoryManager;
}
-namespace Core::Frontend {
-class EmuWindow;
-}
-
-namespace VideoCommon::Shader {
-class AsyncShaders;
-}
-
namespace OpenGL {
class Device;
+class ProgramManager;
class RasterizerOpenGL;
+using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-struct ProgramHandle {
- OGLProgram source_program;
- OGLAssemblyProgram assembly_program;
-};
-using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
-
-struct PrecompiledShader {
- ProgramSharedPtr program;
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
-};
-
-struct ShaderParameters {
- Tegra::GPU& gpu;
- Tegra::Engines::ConstBufferEngineInterface& engine;
- ShaderDiskCacheOpenGL& disk_cache;
- const Device& device;
- VAddr cpu_addr;
- const u8* host_ptr;
- u64 unique_identifier;
-};
-
-ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
- u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- bool hint_retrievable = false);
-
-class Shader final {
+class ShaderCache : public VideoCommon::ShaderCache {
public:
- ~Shader();
-
- /// Gets the GL program handle for the shader
- GLuint GetHandle() const;
-
- bool IsBuilt() const;
-
- /// Gets the shader entries for the shader
- const ShaderEntries& GetEntries() const {
- return entries;
- }
-
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return *registry;
- }
-
- /// Mark a OpenGL shader as built
- void AsyncOpenGLBuilt(OGLProgram new_program);
+ explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_,
+ TextureCache& texture_cache_, BufferCache& buffer_cache_,
+ ProgramManager& program_manager_, StateTracker& state_tracker_,
+ VideoCore::ShaderNotify& shader_notify_);
+ ~ShaderCache();
- /// Mark a GLASM shader as built
- void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
+ void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
+ const VideoCore::DiskResourceLoadCallback& callback);
- static std::unique_ptr<Shader> CreateStageFromMemory(
- const ShaderParameters& params, Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b,
- VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
+ [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
- static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
- ProgramCode code);
-
- static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
- const PrecompiledShader& precompiled_shader);
+ [[nodiscard]] ComputePipeline* CurrentComputePipeline();
private:
- explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
- ProgramSharedPtr program, bool is_built_ = true);
-
- std::shared_ptr<VideoCommon::Shader::Registry> registry;
- ShaderEntries entries;
- ProgramSharedPtr program;
- GLuint handle = 0;
- bool is_built{};
-};
+ GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
-class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
-public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
- Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_);
- ~ShaderCacheOpenGL() override;
+ [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
- /// Loads disk cache for the current game
- void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
- /// Gets the current specified shader stage program
- Shader* GetStageProgram(Maxwell::ShaderProgram program,
- VideoCommon::Shader::AsyncShaders& async_shaders);
+ std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
+ ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
+ std::span<Shader::Environment* const> envs, bool build_in_parallel);
- /// Gets a compute kernel in the passed address
- Shader* GetComputeKernel(GPUVAddr code_addr);
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
+ const VideoCommon::ShaderInfo* shader);
-private:
- ProgramSharedPtr GeneratePrecompiledProgram(
- const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
- const std::unordered_set<GLenum>& supported_formats);
+ std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
+ const ComputePipelineKey& key,
+ Shader::Environment& env);
+
+ std::unique_ptr<ShaderWorker> CreateWorkers() const;
Core::Frontend::EmuWindow& emu_window;
- Tegra::GPU& gpu;
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
+ TextureCache& texture_cache;
+ BufferCache& buffer_cache;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
+ VideoCore::ShaderNotify& shader_notify;
+ const bool use_asynchronous_shaders;
+
+ GraphicsPipelineKey graphics_key{};
+ GraphicsPipeline* current_pipeline{};
- ShaderDiskCacheOpenGL disk_cache;
- std::unordered_map<u64, PrecompiledShader> runtime_cache;
+ ShaderContext::ShaderPools main_pools;
+ std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
+ std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
- std::unique_ptr<Shader> null_shader;
- std::unique_ptr<Shader> null_kernel;
+ Shader::Profile profile;
+ Shader::HostTranslateInfo host_info;
- std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
+ std::filesystem::path shader_cache_filename;
+ std::unique_ptr<ShaderWorker> workers;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h
new file mode 100644
index 000000000..6ff34e5d6
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_context.h
@@ -0,0 +1,33 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/frontend/emu_window.h"
+#include "shader_recompiler/frontend/ir/basic_block.h"
+#include "shader_recompiler/frontend/maxwell/control_flow.h"
+
+namespace OpenGL::ShaderContext {
+struct ShaderPools {
+ void ReleaseContents() {
+ flow_block.ReleaseContents();
+ block.ReleaseContents();
+ inst.ReleaseContents();
+ }
+
+ Shader::ObjectPool<Shader::IR::Inst> inst;
+ Shader::ObjectPool<Shader::IR::Block> block;
+ Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
+};
+
+struct Context {
+ explicit Context(Core::Frontend::EmuWindow& emu_window)
+ : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
+
+ std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
+ Core::Frontend::GraphicsContext::Scoped scoped;
+ ShaderPools pools;
+};
+
+} // namespace OpenGL::ShaderContext
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
deleted file mode 100644
index 9c28498e8..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ /dev/null
@@ -1,2986 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <variant>
-#include <vector>
-
-#include <fmt/format.h>
-
-#include "common/alignment.h"
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/div_ceil.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/ast.h"
-#include "video_core/shader/node.h"
-#include "video_core/shader/shader_ir.h"
-#include "video_core/shader/transform_feedback.h"
-
-namespace OpenGL {
-
-namespace {
-
-using Tegra::Engines::ShaderType;
-using Tegra::Shader::Attribute;
-using Tegra::Shader::Header;
-using Tegra::Shader::IpaInterpMode;
-using Tegra::Shader::IpaMode;
-using Tegra::Shader::IpaSampleMode;
-using Tegra::Shader::PixelImap;
-using Tegra::Shader::Register;
-using Tegra::Shader::TextureType;
-
-using namespace VideoCommon::Shader;
-using namespace std::string_literals;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using Operation = const OperationNode&;
-
-class ASTDecompiler;
-class ExprDecompiler;
-
-enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-
-constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
-
-constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
-constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
-
-struct TextureOffset {};
-struct TextureDerivates {};
-using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
-
-constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
-
-constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
- bvec2 is_nan1 = isnan(pair1);
- bvec2 is_nan2 = isnan(pair2);
- return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-)";
-
-class ShaderWriter final {
-public:
- void AddExpression(std::string_view text) {
- DEBUG_ASSERT(scope >= 0);
- if (!text.empty()) {
- AppendIndentation();
- }
- shader_source += text;
- }
-
- // Forwards all arguments directly to libfmt.
- // Note that all formatting requirements for fmt must be
- // obeyed when using this function. (e.g. {{ must be used
- // printing the character '{' is desirable. Ditto for }} and '}',
- // etc).
- template <typename... Args>
- void AddLine(std::string_view text, Args&&... args) {
- AddExpression(fmt::format(fmt::runtime(text), std::forward<Args>(args)...));
- AddNewLine();
- }
-
- void AddNewLine() {
- DEBUG_ASSERT(scope >= 0);
- shader_source += '\n';
- }
-
- std::string GenerateTemporary() {
- return fmt::format("tmp{}", temporary_index++);
- }
-
- std::string GetResult() {
- return std::move(shader_source);
- }
-
- s32 scope = 0;
-
-private:
- void AppendIndentation() {
- shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
- }
-
- std::string shader_source;
- u32 temporary_index = 1;
-};
-
-class Expression final {
-public:
- Expression(std::string code_, Type type_) : code{std::move(code_)}, type{type_} {
- ASSERT(type != Type::Void);
- }
- Expression() : type{Type::Void} {}
-
- Type GetType() const {
- return type;
- }
-
- std::string GetCode() const {
- return code;
- }
-
- void CheckVoid() const {
- ASSERT(type == Type::Void);
- }
-
- std::string As(Type type_) const {
- switch (type_) {
- case Type::Bool:
- return AsBool();
- case Type::Bool2:
- return AsBool2();
- case Type::Float:
- return AsFloat();
- case Type::Int:
- return AsInt();
- case Type::Uint:
- return AsUint();
- case Type::HalfFloat:
- return AsHalfFloat();
- default:
- UNREACHABLE_MSG("Invalid type");
- return code;
- }
- }
-
- std::string AsBool() const {
- switch (type) {
- case Type::Bool:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsBool2() const {
- switch (type) {
- case Type::Bool2:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsFloat() const {
- switch (type) {
- case Type::Float:
- return code;
- case Type::Uint:
- return fmt::format("utof({})", code);
- case Type::Int:
- return fmt::format("itof({})", code);
- case Type::HalfFloat:
- return fmt::format("utof(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsInt() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftoi({})", code);
- case Type::Uint:
- return fmt::format("int({})", code);
- case Type::Int:
- return code;
- case Type::HalfFloat:
- return fmt::format("int(packHalf2x16({}))", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsUint() const {
- switch (type) {
- case Type::Float:
- return fmt::format("ftou({})", code);
- case Type::Uint:
- return code;
- case Type::Int:
- return fmt::format("uint({})", code);
- case Type::HalfFloat:
- return fmt::format("packHalf2x16({})", code);
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
- std::string AsHalfFloat() const {
- switch (type) {
- case Type::Float:
- return fmt::format("unpackHalf2x16(ftou({}))", code);
- case Type::Uint:
- return fmt::format("unpackHalf2x16({})", code);
- case Type::Int:
- return fmt::format("unpackHalf2x16(int({}))", code);
- case Type::HalfFloat:
- return code;
- default:
- UNREACHABLE_MSG("Incompatible types");
- return code;
- }
- }
-
-private:
- std::string code;
- Type type{};
-};
-
-const char* GetTypeString(Type type) {
- switch (type) {
- case Type::Bool:
- return "bool";
- case Type::Bool2:
- return "bvec2";
- case Type::Float:
- return "float";
- case Type::Int:
- return "int";
- case Type::Uint:
- return "uint";
- case Type::HalfFloat:
- return "vec2";
- default:
- UNREACHABLE_MSG("Invalid type");
- return "<invalid type>";
- }
-}
-
-const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
- switch (image_type) {
- case Tegra::Shader::ImageType::Texture1D:
- return "1D";
- case Tegra::Shader::ImageType::TextureBuffer:
- return "Buffer";
- case Tegra::Shader::ImageType::Texture1DArray:
- return "1DArray";
- case Tegra::Shader::ImageType::Texture2D:
- return "2D";
- case Tegra::Shader::ImageType::Texture2DArray:
- return "2DArray";
- case Tegra::Shader::ImageType::Texture3D:
- return "3D";
- default:
- UNREACHABLE();
- return "1D";
- }
-}
-
-/// Describes primitive behavior on geometry shaders
-std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
- switch (topology) {
- case Maxwell::PrimitiveTopology::Points:
- return {"points", 1};
- case Maxwell::PrimitiveTopology::Lines:
- case Maxwell::PrimitiveTopology::LineStrip:
- return {"lines", 2};
- case Maxwell::PrimitiveTopology::LinesAdjacency:
- case Maxwell::PrimitiveTopology::LineStripAdjacency:
- return {"lines_adjacency", 4};
- case Maxwell::PrimitiveTopology::Triangles:
- case Maxwell::PrimitiveTopology::TriangleStrip:
- case Maxwell::PrimitiveTopology::TriangleFan:
- return {"triangles", 3};
- case Maxwell::PrimitiveTopology::TrianglesAdjacency:
- case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
- return {"triangles_adjacency", 6};
- default:
- UNIMPLEMENTED_MSG("topology={}", topology);
- return {"points", 1};
- }
-}
-
-/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
- return swizzle.at(element);
-}
-
-constexpr const char* GetColorSwizzle(std::size_t element) {
- constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
- return swizzle.at(element);
-}
-
-/// Translate topology
-std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
- switch (topology) {
- case Tegra::Shader::OutputTopology::PointList:
- return "points";
- case Tegra::Shader::OutputTopology::LineStrip:
- return "line_strip";
- case Tegra::Shader::OutputTopology::TriangleStrip:
- return "triangle_strip";
- default:
- UNIMPLEMENTED_MSG("Unknown output topology: {}", topology);
- return "points";
- }
-}
-
-/// Returns true if an object has to be treated as precise
-bool IsPrecise(Operation operand) {
- const auto& meta{operand.GetMeta()};
- if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
- return arithmetic->precise;
- }
- return false;
-}
-
-bool IsPrecise(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- return IsPrecise(*operation);
- }
- return false;
-}
-
-constexpr bool IsGenericAttribute(Attribute::Index index) {
- return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
-}
-
-constexpr bool IsLegacyTexCoord(Attribute::Index index) {
- return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
- static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
-}
-
-constexpr Attribute::Index ToGenericAttribute(u64 value) {
- return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
-}
-
-constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
- return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
-}
-
-u32 GetGenericAttributeIndex(Attribute::Index index) {
- ASSERT(IsGenericAttribute(index));
- return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
-}
-
-constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
- switch (stack) {
- case MetaStackClass::Ssy:
- return "ssy";
- case MetaStackClass::Pbk:
- return "pbk";
- }
- return {};
-}
-
-std::string FlowStackName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
-}
-
-std::string FlowStackTopName(MetaStackClass stack) {
- return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
-}
-
-struct GenericVaryingDescription {
- std::string name;
- u8 first_element = 0;
- bool is_scalar = false;
-};
-
-class GLSLDecompiler final {
-public:
- explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_,
- ShaderType stage_, std::string_view identifier_,
- std::string_view suffix_)
- : device{device_}, ir{ir_}, registry{registry_}, stage{stage_},
- identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} {
- if (stage != ShaderType::Compute) {
- transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
- }
- }
-
- void Decompile() {
- DeclareHeader();
- DeclareVertex();
- DeclareGeometry();
- DeclareFragment();
- DeclareCompute();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareImages();
- DeclareSamplers();
- DeclareGlobalMemory();
- DeclareConstantBuffers();
- DeclareLocalMemory();
- DeclareRegisters();
- DeclarePredicates();
- DeclareInternalFlags();
- DeclareCustomVariables();
- DeclarePhysicalAttributeReader();
-
- code.AddLine("void main() {{");
- ++code.scope;
-
- if (stage == ShaderType::Vertex) {
- code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
- }
-
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
- }
-
- --code.scope;
- code.AddLine("}}");
- }
-
- std::string GetResult() {
- return code.GetResult();
- }
-
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
- void DecompileBranchMode() {
- // VM's program counter
- const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = {}U;", first_address);
-
- // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
- // unlikely that shaders will use 20 nested SSYs and PBKs.
- constexpr u32 FLOW_STACK_SIZE = 20;
- if (!ir.IsFlowStackDisabled()) {
- for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
- code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
- }
- }
-
- code.AddLine("while (true) {{");
- ++code.scope;
-
- code.AddLine("switch (jmp_to) {{");
-
- for (const auto& pair : ir.GetBasicBlocks()) {
- const auto& [address, bb] = pair;
- code.AddLine("case 0x{:X}U: {{", address);
- ++code.scope;
-
- VisitBlock(bb);
-
- --code.scope;
- code.AddLine("}}");
- }
-
- code.AddLine("default: return;");
- code.AddLine("}}");
-
- --code.scope;
- code.AddLine("}}");
- }
-
- void DecompileAST();
-
- void DeclareHeader() {
- if (!identifier.empty()) {
- code.AddLine("// {}", identifier);
- }
- const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate();
- code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core");
- code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
- if (device.HasShaderBallot()) {
- code.AddLine("#extension GL_ARB_shader_ballot : require");
- }
- if (device.HasVertexViewportLayer()) {
- code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
- }
- if (device.HasImageLoadFormatted()) {
- code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
- }
- if (device.HasTextureShadowLod()) {
- code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
- }
- if (device.HasWarpIntrinsics()) {
- code.AddLine("#extension GL_NV_gpu_shader5 : require");
- code.AddLine("#extension GL_NV_shader_thread_group : require");
- code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
- }
- // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
- // operations) on places where we don't want to.
- // Thanks to Ryujinx for finding this workaround.
- code.AddLine("#pragma optionNV(fastmath off)");
-
- code.AddNewLine();
-
- code.AddLine(COMMON_DECLARATIONS);
- }
-
- void DeclareVertex() {
- if (stage != ShaderType::Vertex) {
- return;
- }
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareGeometry() {
- if (stage != ShaderType::Geometry) {
- return;
- }
-
- const auto& info = registry.GetGraphicsInfo();
- const auto input_topology = info.primitive_topology;
- const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
- max_input_vertices = max_vertices;
- code.AddLine("layout ({}) in;", glsl_topology);
-
- const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_output_vertices = header.common4.max_output_vertices.Value();
- code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
- code.AddNewLine();
-
- code.AddLine("in gl_PerVertex {{");
- ++code.scope;
- code.AddLine("vec4 gl_Position;");
- --code.scope;
- code.AddLine("}} gl_in[];");
-
- DeclareVertexRedeclarations();
- }
-
- void DeclareFragment() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("in gl_PerFragment {{");
- ++code.scope;
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_Color;");
- code.AddLine("vec4 gl_SecondaryColor;");
- --code.scope;
- code.AddLine("}};");
- }
-
- for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
- code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
- }
- }
-
- void DeclareCompute() {
- if (stage != ShaderType::Compute) {
- return;
- }
- const auto& info = registry.GetComputeInfo();
- if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
- const u32 limit = device.GetMaxComputeSharedMemorySize();
- if (size > limit) {
- LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
- size, limit);
- size = limit;
- }
-
- code.AddLine("shared uint smem[{}];", size / 4);
- code.AddNewLine();
- }
- code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
- info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
- code.AddNewLine();
- }
-
- void DeclareVertexRedeclarations() {
- code.AddLine("out gl_PerVertex {{");
- ++code.scope;
-
- auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
- if (!pos_xfb.empty()) {
- pos_xfb = fmt::format("layout ({}) ", pos_xfb);
- }
- const char* pos_type =
- FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
- code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
-
- for (const auto attribute : ir.GetOutputAttributes()) {
- if (attribute == Attribute::Index::ClipDistances0123 ||
- attribute == Attribute::Index::ClipDistances4567) {
- code.AddLine("float gl_ClipDistance[];");
- break;
- }
- }
-
- if (stage != ShaderType::Geometry &&
- (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
- if (ir.UsesLayer()) {
- code.AddLine("int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("int gl_ViewportIndex;");
- }
- } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
- !device.HasVertexViewportLayer()) {
- LOG_ERROR(
- Render_OpenGL,
- "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
- }
-
- if (ir.UsesPointSize()) {
- code.AddLine("float gl_PointSize;");
- }
-
- if (ir.UsesLegacyVaryings()) {
- code.AddLine("vec4 gl_TexCoord[8];");
- code.AddLine("vec4 gl_FrontColor;");
- code.AddLine("vec4 gl_FrontSecondaryColor;");
- code.AddLine("vec4 gl_BackColor;");
- code.AddLine("vec4 gl_BackSecondaryColor;");
- }
-
- --code.scope;
- code.AddLine("}};");
- code.AddNewLine();
-
- if (stage == ShaderType::Geometry) {
- if (ir.UsesLayer()) {
- code.AddLine("out int gl_Layer;");
- }
- if (ir.UsesViewportIndex()) {
- code.AddLine("out int gl_ViewportIndex;");
- }
- }
- code.AddNewLine();
- }
-
- void DeclareRegisters() {
- const auto& registers = ir.GetRegisters();
- for (const u32 gpr : registers) {
- code.AddLine("float {} = 0.0f;", GetRegister(gpr));
- }
- if (!registers.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareCustomVariables() {
- const u32 num_custom_variables = ir.GetNumCustomVariables();
- for (u32 i = 0; i < num_custom_variables; ++i) {
- code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
- }
- if (num_custom_variables > 0) {
- code.AddNewLine();
- }
- }
-
- void DeclarePredicates() {
- const auto& predicates = ir.GetPredicates();
- for (const auto pred : predicates) {
- code.AddLine("bool {} = false;", GetPredicate(pred));
- }
- if (!predicates.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareLocalMemory() {
- u64 local_memory_size = 0;
- if (stage == ShaderType::Compute) {
- local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
- } else {
- local_memory_size = header.GetLocalMemorySize();
- }
- if (local_memory_size == 0) {
- return;
- }
- const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
- code.AddNewLine();
- }
-
- void DeclareInternalFlags() {
- for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
- const auto flag_code = static_cast<InternalFlag>(flag);
- code.AddLine("bool {} = false;", GetInternalFlag(flag_code));
- }
- code.AddNewLine();
- }
-
- const char* GetInputFlags(PixelImap attribute) {
- switch (attribute) {
- case PixelImap::Perspective:
- return "smooth";
- case PixelImap::Constant:
- return "flat";
- case PixelImap::ScreenLinear:
- return "noperspective";
- case PixelImap::Unused:
- break;
- }
- UNIMPLEMENTED_MSG("Unknown attribute usage index={}", attribute);
- return {};
- }
-
- void DeclareInputAttributes() {
- if (ir.HasPhysicalAttributes()) {
- const u32 num_inputs{GetNumPhysicalInputAttributes()};
- for (u32 i = 0; i < num_inputs; ++i) {
- DeclareInputAttribute(ToGenericAttribute(i), true);
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetInputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareInputAttribute(index, false);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
- const u32 location{GetGenericAttributeIndex(index)};
-
- std::string name{GetGenericInputAttribute(index)};
- if (stage == ShaderType::Geometry) {
- name = "gs_" + name + "[]";
- }
-
- std::string suffix_;
- if (stage == ShaderType::Fragment) {
- const auto input_mode{header.ps.GetPixelImap(location)};
- if (input_mode == PixelImap::Unused) {
- return;
- }
- suffix_ = GetInputFlags(input_mode);
- }
-
- code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix_, name);
- }
-
- void DeclareOutputAttributes() {
- if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
- for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
- DeclareOutputAttribute(ToGenericAttribute(i));
- }
- code.AddNewLine();
- return;
- }
-
- const auto& attributes = ir.GetOutputAttributes();
- for (const auto index : attributes) {
- if (IsGenericAttribute(index)) {
- DeclareOutputAttribute(index);
- }
- }
- if (!attributes.empty()) {
- code.AddNewLine();
- }
- }
-
- std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return std::nullopt;
- }
- return it->second.components;
- }
-
- std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
- const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
- const auto it = transform_feedback.find(location);
- if (it == transform_feedback.end()) {
- return {};
- }
-
- const VaryingTFB& tfb = it->second;
- return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
- tfb.offset, tfb.stride);
- }
-
- void DeclareOutputAttribute(Attribute::Index index) {
- static constexpr std::string_view swizzle = "xyzw";
- u8 element = 0;
- while (element < 4) {
- auto xfb = GetTransformFeedbackDecoration(index, element);
- if (!xfb.empty()) {
- xfb = fmt::format(", {}", xfb);
- }
- const std::size_t remainder = 4 - element;
- const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
- const char* const type = FLOAT_TYPES.at(num_components - 1);
-
- const u32 location = GetGenericAttributeIndex(index);
-
- GenericVaryingDescription description;
- description.first_element = static_cast<u8>(element);
- description.is_scalar = num_components == 1;
- description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
- if (element != 0 || num_components != 4) {
- const std::string_view name_swizzle = swizzle.substr(element, num_components);
- description.name = fmt::format("{}_{}", description.name, name_swizzle);
- }
- for (std::size_t i = 0; i < num_components; ++i) {
- const u8 offset = static_cast<u8>(location * 4 + element + i);
- varying_description.insert({offset, description});
- }
-
- code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
- xfb, type, description.name);
-
- element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
- }
- }
-
- void DeclareConstantBuffers() {
- u32 binding = device.GetBaseBindings(stage).uniform_buffer;
- for (const auto& [index, info] : ir.GetConstantBuffers()) {
- const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32));
- const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
- code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
- GetConstBufferBlock(index));
- code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareGlobalMemory() {
- u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- // Since we don't know how the shader will use the shader, hint the driver to disable as
- // much optimizations as possible
- std::string qualifier = "coherent volatile";
- if (usage.is_read && !usage.is_written) {
- qualifier += " readonly";
- } else if (usage.is_written && !usage.is_read) {
- qualifier += " writeonly";
- }
-
- code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
- GetGlobalMemoryBlock(base));
- code.AddLine(" uint {}[];", GetGlobalMemory(base));
- code.AddLine("}};");
- code.AddNewLine();
- }
- }
-
- void DeclareSamplers() {
- u32 binding = device.GetBaseBindings(stage).sampler;
- for (const auto& sampler : ir.GetSamplers()) {
- const std::string name = GetSampler(sampler);
- const std::string description = fmt::format("layout (binding = {}) uniform", binding);
- binding += sampler.is_indexed ? sampler.size : 1;
-
- std::string sampler_type = [&]() {
- if (sampler.is_buffer) {
- return "samplerBuffer";
- }
- switch (sampler.type) {
- case TextureType::Texture1D:
- return "sampler1D";
- case TextureType::Texture2D:
- return "sampler2D";
- case TextureType::Texture3D:
- return "sampler3D";
- case TextureType::TextureCube:
- return "samplerCube";
- default:
- UNREACHABLE();
- return "sampler2D";
- }
- }();
- if (sampler.is_array) {
- sampler_type += "Array";
- }
- if (sampler.is_shadow) {
- sampler_type += "Shadow";
- }
-
- if (!sampler.is_indexed) {
- code.AddLine("{} {} {};", description, sampler_type, name);
- } else {
- code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.size);
- }
- }
- if (!ir.GetSamplers().empty()) {
- code.AddNewLine();
- }
- }
-
- void DeclarePhysicalAttributeReader() {
- if (!ir.HasPhysicalAttributes()) {
- return;
- }
- code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
- ++code.scope;
- code.AddLine("switch (physical_address) {{");
-
- // Just declare generic attributes for now.
- const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())};
- for (u32 index = 0; index < num_attributes; ++index) {
- const auto attribute{ToGenericAttribute(index)};
- for (u32 element = 0; element < 4; ++element) {
- constexpr u32 generic_base = 0x80;
- constexpr u32 generic_stride = 16;
- constexpr u32 element_stride = 4;
- const u32 address{generic_base + index * generic_stride + element * element_stride};
-
- const bool declared = stage != ShaderType::Fragment ||
- header.ps.GetPixelImap(index) != PixelImap::Unused;
- const std::string value =
- declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
- code.AddLine("case 0x{:X}U: return {};", address, value);
- }
- }
-
- code.AddLine("default: return 0;");
-
- code.AddLine("}}");
- --code.scope;
- code.AddLine("}}");
- code.AddNewLine();
- }
-
- void DeclareImages() {
- u32 binding = device.GetBaseBindings(stage).image;
- for (const auto& image : ir.GetImages()) {
- std::string qualifier = "coherent volatile";
- if (image.is_read && !image.is_written) {
- qualifier += " readonly";
- } else if (image.is_written && !image.is_read) {
- qualifier += " writeonly";
- }
-
- const char* format = image.is_atomic ? "r32ui, " : "";
- const char* type_declaration = GetImageTypeDeclaration(image.type);
- code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
- qualifier, type_declaration, GetImage(image));
- }
- if (!ir.GetImages().empty()) {
- code.AddNewLine();
- }
- }
-
- void VisitBlock(const NodeBlock& bb) {
- for (const auto& node : bb) {
- Visit(node).CheckVoid();
- }
- }
-
- Expression Visit(const Node& node) {
- if (const auto operation = std::get_if<OperationNode>(&*node)) {
- if (const auto amend_index = operation->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- const auto operation_index = static_cast<std::size_t>(operation->GetCode());
- if (operation_index >= operation_decompilers.size()) {
- UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
- return {};
- }
- const auto decompiler = operation_decompilers[operation_index];
- if (decompiler == nullptr) {
- UNREACHABLE_MSG("Undefined operation: {}", operation_index);
- return {};
- }
- return (this->*decompiler)(*operation);
- }
-
- if (const auto gpr = std::get_if<GprNode>(&*node)) {
- const u32 index = gpr->GetIndex();
- if (index == Register::ZeroIndex) {
- return {"0U", Type::Uint};
- }
- return {GetRegister(index), Type::Float};
- }
-
- if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
- const u32 index = cv->GetIndex();
- return {GetCustomVariable(index), Type::Float};
- }
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
- const u32 value = immediate->GetValue();
- if (value < 10) {
- // For eyecandy avoid using hex numbers on single digits
- return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
- }
- return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
- }
-
- if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
- const auto value = [&]() -> std::string {
- switch (const auto index = predicate->GetIndex(); index) {
- case Tegra::Shader::Pred::UnusedIndex:
- return "true";
- case Tegra::Shader::Pred::NeverExecute:
- return "false";
- default:
- return GetPredicate(index);
- }
- }();
- if (predicate->IsNegated()) {
- return {fmt::format("!({})", value), Type::Bool};
- }
- return {value, Type::Bool};
- }
-
- if (const auto abuf = std::get_if<AbufNode>(&*node)) {
- UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
- "Physical attributes in geometry shaders are not implemented");
- if (abuf->IsPhysicalBuffer()) {
- return {fmt::format("ReadPhysicalAttribute({})",
- Visit(abuf->GetPhysicalAddress()).AsUint()),
- Type::Float};
- }
- return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
- }
-
- if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
- const Node offset = cbuf->GetOffset();
-
- if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
- // Direct access
- const u32 offset_imm = immediate->GetValue();
- ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4),
- Type::Uint};
- }
-
- // Indirect access
- const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
-
- if (!device.HasComponentIndexingBug()) {
- return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset),
- Type::Uint};
- }
-
- // AMD's proprietary GLSL compiler emits ill code for variable component access.
- // To bypass this driver bug generate 4 ifs, one per each component.
- const std::string pack = code.GenerateTemporary();
- code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
- final_offset);
-
- const std::string result = code.GenerateTemporary();
- code.AddLine("uint {};", result);
- for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
- code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
- GetSwizzle(swizzle));
- }
- return {result, Type::Uint};
- }
-
- if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- }
-
- if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- return {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- }
-
- if (const auto smem = std::get_if<SmemNode>(&*node)) {
- return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- }
-
- if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
- }
-
- if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
- if (const auto amend_index = conditional->GetAmendIndex()) {
- Visit(ir.GetAmendNode(*amend_index)).CheckVoid();
- }
- // It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
- ++code.scope;
-
- VisitBlock(conditional->GetCode());
-
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- if (const auto comment = std::get_if<CommentNode>(&*node)) {
- code.AddLine("// " + comment->GetText());
- return {};
- }
-
- UNREACHABLE();
- return {};
- }
-
- Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
- const auto GeometryPass = [&](std::string_view name) {
- if (stage == ShaderType::Geometry && buffer) {
- // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
- // set an 0x80000000 index for those and the shader fails to build. Find out why
- // this happens and what's its intent.
- return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
- max_input_vertices.value());
- }
- return std::string(name);
- };
-
- switch (attribute) {
- case Attribute::Index::Position:
- switch (stage) {
- case ShaderType::Geometry:
- return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
- GetSwizzle(element)),
- Type::Float};
- case ShaderType::Fragment:
- return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
- default:
- UNREACHABLE();
- return {"0", Type::Int};
- }
- case Attribute::Index::FrontColor:
- return {"gl_Color"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::FrontSecondaryColor:
- return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
- case Attribute::Index::PointCoord:
- switch (element) {
- case 0:
- return {"gl_PointCoord.x", Type::Float};
- case 1:
- return {"gl_PointCoord.y", Type::Float};
- case 2:
- case 3:
- return {"0.0f", Type::Float};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- case Attribute::Index::TessCoordInstanceIDVertexID:
- // TODO(Subv): Find out what the values are for the first two elements when inside a
- // vertex shader, and what's the value of the fourth element when inside a Tess Eval
- // shader.
- ASSERT(stage == ShaderType::Vertex);
- switch (element) {
- case 2:
- // Config pack's first value is instance_id.
- return {"gl_InstanceID", Type::Int};
- case 3:
- return {"gl_VertexID", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return {"0", Type::Int};
- case Attribute::Index::FrontFacing:
- // TODO(Subv): Find out what the values are for the other elements.
- ASSERT(stage == ShaderType::Fragment);
- switch (element) {
- case 3:
- return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
- }
- UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return {"0", Type::Int};
- default:
- if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
- Type::Float};
- }
- if (IsLegacyTexCoord(attribute)) {
- UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
- return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float};
- }
- break;
- }
- UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
- return {"0", Type::Int};
- }
-
- Expression ApplyPrecise(Operation operation, std::string value, Type type) {
- if (!IsPrecise(operation)) {
- return {std::move(value), type};
- }
- // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
- // be found in fragment shaders, so we disable precise there. There are vertex shaders that
- // also fail to build but nobody seems to care about those.
- // Note: Only bugged drivers will skip precise.
- const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
-
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
- temporary, value);
- return {std::move(temporary), type};
- }
-
- Expression VisitOperand(Operation operation, std::size_t operand_index) {
- const auto& operand = operation[operand_index];
- const bool parent_precise = IsPrecise(operation);
- const bool child_precise = IsPrecise(operand);
- const bool child_trivial = !std::holds_alternative<OperationNode>(*operand);
- if (!parent_precise || child_precise || child_trivial) {
- return Visit(operand);
- }
-
- Expression value = Visit(operand);
- std::string temporary = code.GenerateTemporary();
- code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
- return {std::move(temporary), value.GetType()};
- }
-
- std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
- const u32 element = abuf->GetElement();
- switch (const auto attribute = abuf->GetIndex()) {
- case Attribute::Index::Position:
- return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::LayerViewportPointSize:
- switch (element) {
- case 0:
- UNIMPLEMENTED();
- return std::nullopt;
- case 1:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_Layer", Type::Int}};
- case 2:
- if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return std::nullopt;
- }
- return {{"gl_ViewportIndex", Type::Int}};
- case 3:
- return {{"gl_PointSize", Type::Float}};
- }
- return std::nullopt;
- case Attribute::Index::FrontColor:
- return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::FrontSecondaryColor:
- return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackColor:
- return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::BackSecondaryColor:
- return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
- case Attribute::Index::ClipDistances0123:
- return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
- case Attribute::Index::ClipDistances4567:
- return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
- default:
- if (IsGenericAttribute(attribute)) {
- return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
- }
- if (IsLegacyTexCoord(attribute)) {
- return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
- GetSwizzle(element)),
- Type::Float}};
- }
- UNIMPLEMENTED_MSG("Unhandled output attribute: {}", attribute);
- return std::nullopt;
- }
- }
-
- Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
- Type type_a) {
- std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
- Type type_a, Type type_b, Type type_c) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c, Type type_d) {
- const std::string op_a = VisitOperand(operation, 0).As(type_a);
- const std::string op_b = VisitOperand(operation, 1).As(type_b);
- const std::string op_c = VisitOperand(operation, 2).As(type_c);
- const std::string op_d = VisitOperand(operation, 3).As(type_d);
- std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
-
- return ApplyPrecise(operation, std::move(op_str), result_type);
- }
-
- std::string GenerateTexture(Operation operation, const std::string& function_suffix,
- const std::vector<TextureIR>& extras, bool separate_dc = false) {
- constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
-
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::size_t count = operation.GetOperandsCount();
- const bool has_array = meta->sampler.is_array;
- const bool has_shadow = meta->sampler.is_shadow;
- const bool workaround_lod_array_shadow_as_grad =
- !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube);
-
- std::string expr = "texture";
-
- if (workaround_lod_array_shadow_as_grad) {
- expr += "Grad";
- } else {
- expr += function_suffix;
- }
-
- if (!meta->aoffi.empty()) {
- expr += "Offset";
- } else if (!meta->ptp.empty()) {
- expr += "Offsets";
- }
- if (!meta->sampler.is_indexed) {
- expr += '(' + GetSampler(meta->sampler) + ", ";
- } else {
- expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
- }
- expr += coord_constructors.at(count + (has_array ? 1 : 0) +
- (has_shadow && !separate_dc ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- expr += Visit(operation[i]).AsFloat();
-
- const std::size_t next = i + 1;
- if (next < count)
- expr += ", ";
- }
- if (has_array) {
- expr += ", float(" + Visit(meta->array).AsInt() + ')';
- }
- if (has_shadow) {
- if (separate_dc) {
- expr += "), " + Visit(meta->depth_compare).AsFloat();
- } else {
- expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
- }
- } else {
- expr += ')';
- }
-
- if (workaround_lod_array_shadow_as_grad) {
- switch (meta->sampler.type) {
- case TextureType::Texture2D:
- return expr + ", vec2(0.0), vec2(0.0))";
- case TextureType::TextureCube:
- return expr + ", vec3(0.0), vec3(0.0))";
- default:
- UNREACHABLE();
- break;
- }
- }
-
- for (const auto& variant : extras) {
- if (const auto argument = std::get_if<TextureArgument>(&variant)) {
- expr += GenerateTextureArgument(*argument);
- } else if (std::holds_alternative<TextureOffset>(variant)) {
- if (!meta->aoffi.empty()) {
- expr += GenerateTextureAoffi(meta->aoffi);
- } else if (!meta->ptp.empty()) {
- expr += GenerateTexturePtp(meta->ptp);
- }
- } else if (std::holds_alternative<TextureDerivates>(variant)) {
- expr += GenerateTextureDerivates(meta->derivates);
- } else {
- UNREACHABLE();
- }
- }
-
- return expr + ')';
- }
-
- std::string GenerateTextureArgument(const TextureArgument& argument) {
- const auto& [type, operand] = argument;
- if (operand == nullptr) {
- return {};
- }
-
- std::string expr = ", ";
- switch (type) {
- case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
- // Inline the string as an immediate integer in GLSL (some extra arguments are
- // required to be constant)
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else {
- expr += Visit(operand).AsInt();
- }
- break;
- case Type::Float:
- expr += Visit(operand).AsFloat();
- break;
- default: {
- const auto type_int = static_cast<u32>(type);
- UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
- expr += '0';
- break;
- }
- }
- return expr;
- }
-
- std::string ReadTextureOffset(const Node& value) {
- if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
- // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
- // to be constant by the standard).
- return std::to_string(static_cast<s32>(immediate->GetValue()));
- } else if (device.HasVariableAoffi()) {
- // Avoid using variable AOFFI on unsupported devices.
- return Visit(value).AsInt();
- } else {
- // Insert 0 on devices not supporting variable AOFFI.
- return "0";
- }
- }
-
- std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
- if (aoffi.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
- std::string expr = ", ";
- expr += coord_constructors.at(aoffi.size() - 1);
- expr += '(';
-
- for (std::size_t index = 0; index < aoffi.size(); ++index) {
- expr += ReadTextureOffset(aoffi.at(index));
- if (index + 1 < aoffi.size()) {
- expr += ", ";
- }
- }
- expr += ')';
-
- return expr;
- }
-
- std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
- static constexpr std::size_t num_vectors = 4;
- ASSERT(ptp.size() == num_vectors * 2);
-
- std::string expr = ", ivec2[](";
- for (std::size_t vector = 0; vector < num_vectors; ++vector) {
- const bool has_next = vector + 1 < num_vectors;
- expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
- ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
- }
- expr += ')';
- return expr;
- }
-
- std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
- if (derivates.empty()) {
- return {};
- }
- constexpr std::array coord_constructors = {"float", "vec2", "vec3"};
- std::string expr = ", ";
- const std::size_t components = derivates.size() / 2;
- std::string dx = coord_constructors.at(components - 1);
- std::string dy = coord_constructors.at(components - 1);
- dx += '(';
- dy += '(';
-
- for (std::size_t index = 0; index < components; ++index) {
- const auto& operand_x{derivates.at(index * 2)};
- const auto& operand_y{derivates.at(index * 2 + 1)};
- dx += Visit(operand_x).AsFloat();
- dy += Visit(operand_y).AsFloat();
-
- if (index + 1 < components) {
- dx += ", ";
- dy += ", ";
- }
- }
- dx += ')';
- dy += ')';
- expr += dx + ", " + dy;
-
- return expr;
- }
-
- std::string BuildIntegerCoordinates(Operation operation) {
- constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
- const std::size_t coords_count{operation.GetOperandsCount()};
- std::string expr = constructors.at(coords_count - 1);
- for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- if (i + 1 < coords_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- std::string BuildImageValues(Operation operation) {
- constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
-
- const std::size_t values_count{meta.values.size()};
- std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
- for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i)).AsUint();
- if (i + 1 < values_count) {
- expr += ", ";
- }
- }
- expr += ')';
- return expr;
- }
-
- Expression Assign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- Expression target;
- if (const auto gpr = std::get_if<GprNode>(&*dest)) {
- if (gpr->GetIndex() == Register::ZeroIndex) {
- // Writing to Register::ZeroIndex is a no op but we still have to visit the source
- // as it might have side effects.
- code.AddLine("{};", Visit(src).GetCode());
- return {};
- }
- target = {GetRegister(gpr->GetIndex()), Type::Float};
- } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
- UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- auto output = GetOutputAttribute(abuf);
- if (!output) {
- return {};
- }
- target = std::move(*output);
- } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- target = {
- fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
- Type::Uint};
- } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
- ASSERT(stage == ShaderType::Compute);
- target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
- } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string real = Visit(gmem->GetRealAddress()).AsUint();
- const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
- const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
- target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
- Type::Uint};
- } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
- target = {GetCustomVariable(cv->GetIndex()), Type::Float};
- } else {
- UNREACHABLE_MSG("Assign called without a proper target");
- }
-
- code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
- return {};
- }
-
- template <Type type>
- Expression Add(Operation operation) {
- return GenerateBinaryInfix(operation, "+", type, type, type);
- }
-
- template <Type type>
- Expression Mul(Operation operation) {
- return GenerateBinaryInfix(operation, "*", type, type, type);
- }
-
- template <Type type>
- Expression Div(Operation operation) {
- return GenerateBinaryInfix(operation, "/", type, type, type);
- }
-
- template <Type type>
- Expression Fma(Operation operation) {
- return GenerateTernary(operation, "fma", type, type, type, type);
- }
-
- template <Type type>
- Expression Negate(Operation operation) {
- return GenerateUnary(operation, "-", type, type);
- }
-
- template <Type type>
- Expression Absolute(Operation operation) {
- return GenerateUnary(operation, "abs", type, type);
- }
-
- Expression FClamp(Operation operation) {
- return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
- Type::Float);
- }
-
- Expression FCastHalf0(Operation operation) {
- return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression FCastHalf1(Operation operation) {
- return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- template <Type type>
- Expression Min(Operation operation) {
- return GenerateBinaryCall(operation, "min", type, type, type);
- }
-
- template <Type type>
- Expression Max(Operation operation) {
- return GenerateBinaryCall(operation, "max", type, type, type);
- }
-
- Expression Select(Operation operation) {
- const std::string condition = Visit(operation[0]).AsBool();
- const std::string true_case = Visit(operation[1]).AsUint();
- const std::string false_case = Visit(operation[2]).AsUint();
- std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Uint);
- }
-
- Expression FCos(Operation operation) {
- return GenerateUnary(operation, "cos", Type::Float, Type::Float);
- }
-
- Expression FSin(Operation operation) {
- return GenerateUnary(operation, "sin", Type::Float, Type::Float);
- }
-
- Expression FExp2(Operation operation) {
- return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
- }
-
- Expression FLog2(Operation operation) {
- return GenerateUnary(operation, "log2", Type::Float, Type::Float);
- }
-
- Expression FInverseSqrt(Operation operation) {
- return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
- }
-
- Expression FSqrt(Operation operation) {
- return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
- }
-
- Expression FRoundEven(Operation operation) {
- return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
- }
-
- Expression FFloor(Operation operation) {
- return GenerateUnary(operation, "floor", Type::Float, Type::Float);
- }
-
- Expression FCeil(Operation operation) {
- return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
- }
-
- Expression FTrunc(Operation operation) {
- return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
- }
-
- template <Type type>
- Expression FCastInteger(Operation operation) {
- return GenerateUnary(operation, "float", Type::Float, type);
- }
-
- Expression FSwizzleAdd(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsFloat();
- const std::string op_b = VisitOperand(operation, 1).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {fmt::format("{} + {}", op_a, op_b), Type::Float};
- }
-
- const std::string instr_mask = VisitOperand(operation, 2).AsUint();
- const std::string mask = code.GenerateTemporary();
- code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
- instr_mask);
-
- const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
- const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
- return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
- Type::Float};
- }
-
- Expression ICastFloat(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Float);
- }
-
- Expression ICastUnsigned(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression LogicalShiftLeft(Operation operation) {
- return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
- }
-
- Expression ILogicalShiftRight(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
- const std::string op_b = VisitOperand(operation, 1).AsUint();
- std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
-
- return ApplyPrecise(operation, std::move(op_str), Type::Int);
- }
-
- Expression IArithmeticShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
- }
-
- template <Type type>
- Expression BitwiseAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseOr(Operation operation) {
- return GenerateBinaryInfix(operation, "|", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^", type, type, type);
- }
-
- template <Type type>
- Expression BitwiseNot(Operation operation) {
- return GenerateUnary(operation, "~", type, type);
- }
-
- Expression UCastFloat(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
- }
-
- Expression UCastSigned(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
- }
-
- Expression UShiftRight(Operation operation) {
- return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
- }
-
- template <Type type>
- Expression BitfieldInsert(Operation operation) {
- return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
- Type::Int);
- }
-
- template <Type type>
- Expression BitfieldExtract(Operation operation) {
- return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
- }
-
- template <Type type>
- Expression BitCount(Operation operation) {
- return GenerateUnary(operation, "bitCount", type, type);
- }
-
- template <Type type>
- Expression BitMSB(Operation operation) {
- return GenerateUnary(operation, "findMSB", type, type);
- }
-
- Expression HNegate(Operation operation) {
- const auto GetNegate = [&](std::size_t index) {
- return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
- };
- return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
- GetNegate(1), GetNegate(2)),
- Type::HalfFloat};
- }
-
- Expression HClamp(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsHalfFloat();
- const std::string min = VisitOperand(operation, 1).AsFloat();
- const std::string max = VisitOperand(operation, 2).AsFloat();
- std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
- return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
- }
-
- Expression HCastFloat(Operation operation) {
- return {fmt::format("vec2({}, 0.0f)", VisitOperand(operation, 0).AsFloat()),
- Type::HalfFloat};
- }
-
- Expression HUnpack(Operation operation) {
- Expression operand = VisitOperand(operation, 0);
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32:
- return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H0_H0:
- return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
- case Tegra::Shader::HalfType::H1_H1:
- return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression HMergeF32(Operation operation) {
- return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
- }
-
- Expression HMergeH0(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
- Type::HalfFloat};
- }
-
- Expression HMergeH1(Operation operation) {
- const std::string dest = VisitOperand(operation, 0).AsUint();
- const std::string src = VisitOperand(operation, 1).AsUint();
- return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
- Type::HalfFloat};
- }
-
- Expression HPack2(Operation operation) {
- return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::HalfFloat};
- }
-
- template <const std::string_view& op, Type type, bool unordered = false>
- Expression Comparison(Operation operation) {
- static_assert(!unordered || type == Type::Float);
-
- Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
-
- if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
- // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
- // and Nvidia's proprietary stacks. Manually force an ordered comparison.
- return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
- if constexpr (!unordered) {
- return expr;
- }
- // Unordered comparisons are always true for NaN operands.
- return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
- VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FOrdered(Operation operation) {
- return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression FUnordered(Operation operation) {
- return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
- VisitOperand(operation, 1).AsFloat()),
- Type::Bool};
- }
-
- Expression LogicalAddCarry(Operation operation) {
- const std::string carry = code.GenerateTemporary();
- code.AddLine("uint {};", carry);
- code.AddLine("uaddCarry({}, {}, {});", VisitOperand(operation, 0).AsUint(),
- VisitOperand(operation, 1).AsUint(), carry);
- return {fmt::format("({} != 0)", carry), Type::Bool};
- }
-
- Expression LogicalAssign(Operation operation) {
- const Node& dest = operation[0];
- const Node& src = operation[1];
-
- std::string target;
-
- if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
- ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
-
- const auto index = pred->GetIndex();
- switch (index) {
- case Tegra::Shader::Pred::NeverExecute:
- case Tegra::Shader::Pred::UnusedIndex:
- // Writing to these predicates is a no-op
- return {};
- }
- target = GetPredicate(index);
- } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
- target = GetInternalFlag(flag->GetFlag());
- }
-
- code.AddLine("{} = {};", target, Visit(src).AsBool());
- return {};
- }
-
- Expression LogicalAnd(Operation operation) {
- return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalOr(Operation operation) {
- return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalXor(Operation operation) {
- return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
- }
-
- Expression LogicalNegate(Operation operation) {
- return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
- }
-
- Expression LogicalPick2(Operation operation) {
- return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
- VisitOperand(operation, 1).AsUint()),
- Type::Bool};
- }
-
- Expression LogicalAnd2(Operation operation) {
- return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
- }
-
- template <bool with_nan>
- Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
- Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat);
- if constexpr (!with_nan) {
- return comparison;
- }
- return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
- VisitOperand(operation, 0).AsHalfFloat(),
- VisitOperand(operation, 1).AsHalfFloat()),
- Type::Bool2};
- }
-
- template <bool with_nan>
- Expression Logical2HLessThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThan");
- }
-
- template <bool with_nan>
- Expression Logical2HEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "equal");
- }
-
- template <bool with_nan>
- Expression Logical2HLessEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterThan(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThan");
- }
-
- template <bool with_nan>
- Expression Logical2HNotEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "notEqual");
- }
-
- template <bool with_nan>
- Expression Logical2HGreaterEqual(Operation operation) {
- return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
- }
-
- Expression Texture(Operation operation) {
- const auto meta = std::get<MetaTexture>(operation.GetMeta());
- const bool separate_dc = meta.sampler.type == TextureType::TextureCube &&
- meta.sampler.is_array && meta.sampler.is_shadow;
- // TODO: Replace this with an array and make GenerateTexture use C++20 std::span
- const std::vector<TextureIR> extras{
- TextureOffset{},
- TextureArgument{Type::Float, meta.bias},
- };
- std::string expr = GenerateTexture(operation, "", extras, separate_dc);
- if (meta.sampler.is_shadow) {
- expr = fmt::format("vec4({})", expr);
- }
- return {expr + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression TextureLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- std::string expr{};
-
- if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
- ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
- meta->sampler.type == TextureType::TextureCube)) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
- expr = GenerateTexture(operation, "Lod", {});
- } else {
- expr = GenerateTexture(operation, "Lod",
- {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
- }
-
- if (meta->sampler.is_shadow) {
- expr = "vec4(" + expr + ')';
- }
- return {expr + GetSwizzle(meta->element), Type::Float};
- }
-
- Expression TextureGather(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
-
- const auto type = meta.sampler.is_shadow ? Type::Float : Type::Int;
- const bool separate_dc = meta.sampler.is_shadow;
-
- std::vector<TextureIR> ir_;
- if (meta.sampler.is_shadow) {
- ir_ = {TextureOffset{}};
- } else {
- ir_ = {TextureOffset{}, TextureArgument{type, meta.component}};
- }
- return {GenerateTexture(operation, "Gather", ir_, separate_dc) + GetSwizzle(meta.element),
- Type::Float};
- }
-
- Expression TextureQueryDimensions(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- const std::string sampler = GetSampler(meta->sampler);
- const std::string lod = VisitOperand(operation, 0).AsInt();
-
- switch (meta->element) {
- case 0:
- case 1:
- return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
- Type::Int};
- case 3:
- return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
- }
- UNREACHABLE();
- return {"0", Type::Int};
- }
-
- Expression TextureQueryLod(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
-
- if (meta->element < 2) {
- return {fmt::format("int(({} * vec2(256)){})",
- GenerateTexture(operation, "QueryLod", {}),
- GetSwizzle(meta->element)),
- Type::Int};
- }
- return {"0", Type::Int};
- }
-
- Expression TexelFetch(Operation operation) {
- constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
- UNIMPLEMENTED_IF(meta->sampler.is_array);
- const std::size_t count = operation.GetOperandsCount();
-
- std::string expr = "texelFetch(";
- expr += GetSampler(meta->sampler);
- expr += ", ";
-
- expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
- expr += '(';
- for (std::size_t i = 0; i < count; ++i) {
- if (i > 0) {
- expr += ", ";
- }
- expr += VisitOperand(operation, i).AsInt();
- }
- if (meta->array) {
- expr += ", ";
- expr += Visit(meta->array).AsInt();
- }
- expr += ')';
-
- if (meta->lod && !meta->sampler.is_buffer) {
- expr += ", ";
- expr += Visit(meta->lod).AsInt();
- }
- expr += ')';
- expr += GetSwizzle(meta->element);
-
- return {std::move(expr), Type::Float};
- }
-
- Expression TextureGradient(Operation operation) {
- const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- std::string expr =
- GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
- return {std::move(expr) + GetSwizzle(meta.element), Type::Float};
- }
-
- Expression ImageLoad(Operation operation) {
- if (!device.HasImageLoadFormatted()) {
- LOG_ERROR(Render_OpenGL,
- "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
- return {"0", Type::Int};
- }
-
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
- BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
- Type::Uint};
- }
-
- Expression ImageStore(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
- BuildIntegerCoordinates(operation), BuildImageValues(operation));
- return {};
- }
-
- template <const std::string_view& opname>
- Expression AtomicImage(Operation operation) {
- const auto& meta{std::get<MetaImage>(operation.GetMeta())};
- ASSERT(meta.values.size() == 1);
-
- return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
- BuildIntegerCoordinates(operation), Visit(meta.values[0]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Atomic(Operation operation) {
- if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) {
- UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations");
- return {};
- }
- return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
- Visit(operation[1]).AsUint()),
- Type::Uint};
- }
-
- template <const std::string_view& opname, Type type>
- Expression Reduce(Operation operation) {
- code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
- return {};
- }
-
- Expression Branch(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
- code.AddLine("break;");
- return {};
- }
-
- Expression BranchIndirect(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0).AsUint();
-
- code.AddLine("jmp_to = {};", op_a);
- code.AddLine("break;");
- return {};
- }
-
- Expression PushFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- const auto target = std::get_if<ImmediateNode>(&*operation[0]);
- UNIMPLEMENTED_IF(!target);
-
- code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
- target->GetValue());
- return {};
- }
-
- Expression PopFlowStack(Operation operation) {
- const auto stack = std::get<MetaStackClass>(operation.GetMeta());
- code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
- code.AddLine("break;");
- return {};
- }
-
- void PreExit() {
- if (stage != ShaderType::Fragment) {
- return;
- }
- const auto& used_registers = ir.GetRegisters();
- const auto SafeGetRegister = [&](u32 reg) -> Expression {
- // TODO(Rodrigo): Replace with contains once C++20 releases
- if (used_registers.find(reg) != used_registers.end()) {
- return {GetRegister(reg), Type::Float};
- }
- return {"0.0f", Type::Float};
- };
-
- UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
-
- // Write the color outputs using the data in the shader registers, disabled
- // rendertargets/components are skipped in the register assignment.
- u32 current_reg = 0;
- for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
- // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
- for (u32 component = 0; component < 4; ++component) {
- if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
- SafeGetRegister(current_reg).AsFloat());
- ++current_reg;
- }
- }
- }
- if (header.ps.omap.depth) {
- // The depth output is always 2 registers after the last color output, and current_reg
- // already contains one past the last color register.
- code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
- }
- }
-
- Expression Exit(Operation operation) {
- PreExit();
- code.AddLine("return;");
- return {};
- }
-
- Expression Discard(Operation operation) {
- // Enclose "discard" in a conditional, so that GLSL compilation does not complain
- // about unexecuted instructions that may follow this.
- code.AddLine("if (true) {{");
- ++code.scope;
- code.AddLine("discard;");
- --code.scope;
- code.AddLine("}}");
- return {};
- }
-
- Expression EmitVertex(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EmitVertex is expected to be used in a geometry shader.");
- code.AddLine("EmitVertex();");
- return {};
- }
-
- Expression EndPrimitive(Operation operation) {
- ASSERT_MSG(stage == ShaderType::Geometry,
- "EndPrimitive is expected to be used in a geometry shader.");
- code.AddLine("EndPrimitive();");
- return {};
- }
-
- Expression InvocationId(Operation operation) {
- return {"gl_InvocationID", Type::Int};
- }
-
- Expression YNegate(Operation operation) {
- // Y_NEGATE is mapped to this uniform value
- return {"gl_FrontMaterial.ambient.a", Type::Float};
- }
-
- template <u32 element>
- Expression LocalInvocationId(Operation) {
- return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
- }
-
- template <u32 element>
- Expression WorkGroupId(Operation) {
- return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
- }
-
- Expression BallotThread(Operation operation) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub on non-Nvidia devices by simulating all threads voting the same as the active
- // one.
- return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
- }
- return {fmt::format("ballotThreadNV({})", value), Type::Uint};
- }
-
- Expression Vote(Operation operation, const char* func) {
- const std::string value = VisitOperand(operation, 0).AsBool();
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // Stub with a warp size of one.
- return {value, Type::Bool};
- }
- return {fmt::format("{}({})", func, value), Type::Bool};
- }
-
- Expression VoteAll(Operation operation) {
- return Vote(operation, "allThreadsNV");
- }
-
- Expression VoteAny(Operation operation) {
- return Vote(operation, "anyThreadNV");
- }
-
- Expression VoteEqual(Operation operation) {
- if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
- // We must return true here since a stub for a theoretical warp size of 1.
- // This will always return an equal result across all votes.
- return {"true", Type::Bool};
- }
- return Vote(operation, "allThreadsEqualNV");
- }
-
- Expression ThreadId(Operation operation) {
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {"0U", Type::Uint};
- }
- return {"gl_SubGroupInvocationARB", Type::Uint};
- }
-
- template <const std::string_view& comparison>
- Expression ThreadMask(Operation) {
- if (device.HasWarpIntrinsics()) {
- return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
- }
- if (device.HasShaderBallot()) {
- return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
- }
- LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
- return {"0U", Type::Uint};
- }
-
- Expression ShuffleIndexed(Operation operation) {
- std::string value = VisitOperand(operation, 0).AsFloat();
-
- if (!device.HasShaderBallot()) {
- LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
- return {std::move(value), Type::Float};
- }
-
- const std::string index = VisitOperand(operation, 1).AsUint();
- return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
- }
-
- Expression Barrier(Operation) {
- if (!ir.IsDecompiled()) {
- LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
- return {};
- }
- code.AddLine("barrier();");
- return {};
- }
-
- Expression MemoryBarrierGroup(Operation) {
- code.AddLine("groupMemoryBarrier();");
- return {};
- }
-
- Expression MemoryBarrierGlobal(Operation) {
- code.AddLine("memoryBarrier();");
- return {};
- }
-
- struct Func final {
- Func() = delete;
- ~Func() = delete;
-
- static constexpr std::string_view LessThan = "<";
- static constexpr std::string_view Equal = "==";
- static constexpr std::string_view LessEqual = "<=";
- static constexpr std::string_view GreaterThan = ">";
- static constexpr std::string_view NotEqual = "!=";
- static constexpr std::string_view GreaterEqual = ">=";
-
- static constexpr std::string_view Eq = "Eq";
- static constexpr std::string_view Ge = "Ge";
- static constexpr std::string_view Gt = "Gt";
- static constexpr std::string_view Le = "Le";
- static constexpr std::string_view Lt = "Lt";
-
- static constexpr std::string_view Add = "Add";
- static constexpr std::string_view Min = "Min";
- static constexpr std::string_view Max = "Max";
- static constexpr std::string_view And = "And";
- static constexpr std::string_view Or = "Or";
- static constexpr std::string_view Xor = "Xor";
- static constexpr std::string_view Exchange = "Exchange";
- };
-
- static constexpr std::array operation_decompilers = {
- &GLSLDecompiler::Assign,
-
- &GLSLDecompiler::Select,
-
- &GLSLDecompiler::Add<Type::Float>,
- &GLSLDecompiler::Mul<Type::Float>,
- &GLSLDecompiler::Div<Type::Float>,
- &GLSLDecompiler::Fma<Type::Float>,
- &GLSLDecompiler::Negate<Type::Float>,
- &GLSLDecompiler::Absolute<Type::Float>,
- &GLSLDecompiler::FClamp,
- &GLSLDecompiler::FCastHalf0,
- &GLSLDecompiler::FCastHalf1,
- &GLSLDecompiler::Min<Type::Float>,
- &GLSLDecompiler::Max<Type::Float>,
- &GLSLDecompiler::FCos,
- &GLSLDecompiler::FSin,
- &GLSLDecompiler::FExp2,
- &GLSLDecompiler::FLog2,
- &GLSLDecompiler::FInverseSqrt,
- &GLSLDecompiler::FSqrt,
- &GLSLDecompiler::FRoundEven,
- &GLSLDecompiler::FFloor,
- &GLSLDecompiler::FCeil,
- &GLSLDecompiler::FTrunc,
- &GLSLDecompiler::FCastInteger<Type::Int>,
- &GLSLDecompiler::FCastInteger<Type::Uint>,
- &GLSLDecompiler::FSwizzleAdd,
-
- &GLSLDecompiler::Add<Type::Int>,
- &GLSLDecompiler::Mul<Type::Int>,
- &GLSLDecompiler::Div<Type::Int>,
- &GLSLDecompiler::Negate<Type::Int>,
- &GLSLDecompiler::Absolute<Type::Int>,
- &GLSLDecompiler::Min<Type::Int>,
- &GLSLDecompiler::Max<Type::Int>,
-
- &GLSLDecompiler::ICastFloat,
- &GLSLDecompiler::ICastUnsigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Int>,
- &GLSLDecompiler::ILogicalShiftRight,
- &GLSLDecompiler::IArithmeticShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Int>,
- &GLSLDecompiler::BitwiseOr<Type::Int>,
- &GLSLDecompiler::BitwiseXor<Type::Int>,
- &GLSLDecompiler::BitwiseNot<Type::Int>,
- &GLSLDecompiler::BitfieldInsert<Type::Int>,
- &GLSLDecompiler::BitfieldExtract<Type::Int>,
- &GLSLDecompiler::BitCount<Type::Int>,
- &GLSLDecompiler::BitMSB<Type::Int>,
-
- &GLSLDecompiler::Add<Type::Uint>,
- &GLSLDecompiler::Mul<Type::Uint>,
- &GLSLDecompiler::Div<Type::Uint>,
- &GLSLDecompiler::Min<Type::Uint>,
- &GLSLDecompiler::Max<Type::Uint>,
- &GLSLDecompiler::UCastFloat,
- &GLSLDecompiler::UCastSigned,
- &GLSLDecompiler::LogicalShiftLeft<Type::Uint>,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::UShiftRight,
- &GLSLDecompiler::BitwiseAnd<Type::Uint>,
- &GLSLDecompiler::BitwiseOr<Type::Uint>,
- &GLSLDecompiler::BitwiseXor<Type::Uint>,
- &GLSLDecompiler::BitwiseNot<Type::Uint>,
- &GLSLDecompiler::BitfieldInsert<Type::Uint>,
- &GLSLDecompiler::BitfieldExtract<Type::Uint>,
- &GLSLDecompiler::BitCount<Type::Uint>,
- &GLSLDecompiler::BitMSB<Type::Uint>,
-
- &GLSLDecompiler::Add<Type::HalfFloat>,
- &GLSLDecompiler::Mul<Type::HalfFloat>,
- &GLSLDecompiler::Fma<Type::HalfFloat>,
- &GLSLDecompiler::Absolute<Type::HalfFloat>,
- &GLSLDecompiler::HNegate,
- &GLSLDecompiler::HClamp,
- &GLSLDecompiler::HCastFloat,
- &GLSLDecompiler::HUnpack,
- &GLSLDecompiler::HMergeF32,
- &GLSLDecompiler::HMergeH0,
- &GLSLDecompiler::HMergeH1,
- &GLSLDecompiler::HPack2,
-
- &GLSLDecompiler::LogicalAssign,
- &GLSLDecompiler::LogicalAnd,
- &GLSLDecompiler::LogicalOr,
- &GLSLDecompiler::LogicalXor,
- &GLSLDecompiler::LogicalNegate,
- &GLSLDecompiler::LogicalPick2,
- &GLSLDecompiler::LogicalAnd2,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
- &GLSLDecompiler::FOrdered,
- &GLSLDecompiler::FUnordered,
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
-
- &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
- &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
-
- &GLSLDecompiler::LogicalAddCarry,
-
- &GLSLDecompiler::Logical2HLessThan<false>,
- &GLSLDecompiler::Logical2HEqual<false>,
- &GLSLDecompiler::Logical2HLessEqual<false>,
- &GLSLDecompiler::Logical2HGreaterThan<false>,
- &GLSLDecompiler::Logical2HNotEqual<false>,
- &GLSLDecompiler::Logical2HGreaterEqual<false>,
- &GLSLDecompiler::Logical2HLessThan<true>,
- &GLSLDecompiler::Logical2HEqual<true>,
- &GLSLDecompiler::Logical2HLessEqual<true>,
- &GLSLDecompiler::Logical2HGreaterThan<true>,
- &GLSLDecompiler::Logical2HNotEqual<true>,
- &GLSLDecompiler::Logical2HGreaterEqual<true>,
-
- &GLSLDecompiler::Texture,
- &GLSLDecompiler::TextureLod,
- &GLSLDecompiler::TextureGather,
- &GLSLDecompiler::TextureQueryDimensions,
- &GLSLDecompiler::TextureQueryLod,
- &GLSLDecompiler::TexelFetch,
- &GLSLDecompiler::TextureGradient,
-
- &GLSLDecompiler::ImageLoad,
- &GLSLDecompiler::ImageStore,
-
- &GLSLDecompiler::AtomicImage<Func::Add>,
- &GLSLDecompiler::AtomicImage<Func::And>,
- &GLSLDecompiler::AtomicImage<Func::Or>,
- &GLSLDecompiler::AtomicImage<Func::Xor>,
- &GLSLDecompiler::AtomicImage<Func::Exchange>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::And, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Uint>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Add, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Min, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Max, Type::Int>,
- &GLSLDecompiler::Atomic<Func::And, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
- &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::And, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
-
- &GLSLDecompiler::Reduce<Func::Add, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Min, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Max, Type::Int>,
- &GLSLDecompiler::Reduce<Func::And, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Or, Type::Int>,
- &GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
-
- &GLSLDecompiler::Branch,
- &GLSLDecompiler::BranchIndirect,
- &GLSLDecompiler::PushFlowStack,
- &GLSLDecompiler::PopFlowStack,
- &GLSLDecompiler::Exit,
- &GLSLDecompiler::Discard,
-
- &GLSLDecompiler::EmitVertex,
- &GLSLDecompiler::EndPrimitive,
-
- &GLSLDecompiler::InvocationId,
- &GLSLDecompiler::YNegate,
- &GLSLDecompiler::LocalInvocationId<0>,
- &GLSLDecompiler::LocalInvocationId<1>,
- &GLSLDecompiler::LocalInvocationId<2>,
- &GLSLDecompiler::WorkGroupId<0>,
- &GLSLDecompiler::WorkGroupId<1>,
- &GLSLDecompiler::WorkGroupId<2>,
-
- &GLSLDecompiler::BallotThread,
- &GLSLDecompiler::VoteAll,
- &GLSLDecompiler::VoteAny,
- &GLSLDecompiler::VoteEqual,
-
- &GLSLDecompiler::ThreadId,
- &GLSLDecompiler::ThreadMask<Func::Eq>,
- &GLSLDecompiler::ThreadMask<Func::Ge>,
- &GLSLDecompiler::ThreadMask<Func::Gt>,
- &GLSLDecompiler::ThreadMask<Func::Le>,
- &GLSLDecompiler::ThreadMask<Func::Lt>,
- &GLSLDecompiler::ShuffleIndexed,
-
- &GLSLDecompiler::Barrier,
- &GLSLDecompiler::MemoryBarrierGroup,
- &GLSLDecompiler::MemoryBarrierGlobal,
- };
- static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
-
- std::string GetRegister(u32 index) const {
- return AppendSuffix(index, "gpr");
- }
-
- std::string GetCustomVariable(u32 index) const {
- return AppendSuffix(index, "custom_var");
- }
-
- std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return AppendSuffix(static_cast<u32>(pred), "pred");
- }
-
- std::string GetGenericInputAttribute(Attribute::Index attribute) const {
- return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
- }
-
- std::unordered_map<u8, GenericVaryingDescription> varying_description;
-
- std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
- const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
- const auto& description = varying_description.at(offset);
- if (description.is_scalar) {
- return description.name;
- }
- return fmt::format("{}[{}]", description.name, element - description.first_element);
- }
-
- std::string GetConstBuffer(u32 index) const {
- return AppendSuffix(index, "cbuf");
- }
-
- std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
- }
-
- std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
- return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
- suffix);
- }
-
- std::string GetConstBufferBlock(u32 index) const {
- return AppendSuffix(index, "cbuf_block");
- }
-
- std::string GetLocalMemory() const {
- if (suffix.empty()) {
- return "lmem";
- } else {
- return "lmem_" + std::string{suffix};
- }
- }
-
- std::string GetInternalFlag(InternalFlag flag) const {
- constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
- "overflow_flag"};
- const auto index = static_cast<u32>(flag);
- ASSERT(index < static_cast<u32>(InternalFlag::Amount));
-
- if (suffix.empty()) {
- return InternalFlagNames[index];
- } else {
- return fmt::format("{}_{}", InternalFlagNames[index], suffix);
- }
- }
-
- std::string GetSampler(const SamplerEntry& sampler) const {
- return AppendSuffix(sampler.index, "sampler");
- }
-
- std::string GetImage(const ImageEntry& image) const {
- return AppendSuffix(image.index, "image");
- }
-
- std::string AppendSuffix(u32 index, std::string_view name) const {
- if (suffix.empty()) {
- return fmt::format("{}{}", name, index);
- } else {
- return fmt::format("{}{}_{}", name, index, suffix);
- }
- }
-
- u32 GetNumPhysicalInputAttributes() const {
- return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
- }
-
- u32 GetNumPhysicalAttributes() const {
- return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes);
- }
-
- u32 GetNumPhysicalVaryings() const {
- return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
- }
-
- const Device& device;
- const ShaderIR& ir;
- const Registry& registry;
- const ShaderType stage;
- const std::string_view identifier;
- const std::string_view suffix;
- const Header header;
- std::unordered_map<u8, VaryingTFB> transform_feedback;
-
- ShaderWriter code;
-
- std::optional<u32> max_input_vertices;
-};
-
-std::string GetFlowVariable(u32 index) {
- return fmt::format("flow_var{}", index);
-}
-
-class ExprDecompiler {
-public:
- explicit ExprDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ExprAnd& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " && ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprOr& expr) {
- inner += '(';
- std::visit(*this, *expr.operand1);
- inner += " || ";
- std::visit(*this, *expr.operand2);
- inner += ')';
- }
-
- void operator()(const ExprNot& expr) {
- inner += '!';
- std::visit(*this, *expr.operand1);
- }
-
- void operator()(const ExprPredicate& expr) {
- const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
- inner += decomp.GetPredicate(pred);
- }
-
- void operator()(const ExprCondCode& expr) {
- inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
- }
-
- void operator()(const ExprVar& expr) {
- inner += GetFlowVariable(expr.var_index);
- }
-
- void operator()(const ExprBoolean& expr) {
- inner += expr.value ? "true" : "false";
- }
-
- void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
- inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
- }
-
- const std::string& GetResult() const {
- return inner;
- }
-
-private:
- GLSLDecompiler& decomp;
- std::string inner;
-};
-
-class ASTDecompiler {
-public:
- explicit ASTDecompiler(GLSLDecompiler& decomp_) : decomp{decomp_} {}
-
- void operator()(const ASTProgram& ast) {
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- }
-
- void operator()(const ASTIfThen& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()(const ASTIfElse& ast) {
- decomp.code.AddLine("else {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
-
- void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTBlockDecoded& ast) {
- decomp.VisitBlock(ast.nodes);
- }
-
- void operator()(const ASTVarSet& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
- }
-
- void operator()(const ASTLabel& ast) {
- decomp.code.AddLine("// Label_{}:", ast.index);
- }
-
- void operator()([[maybe_unused]] const ASTGoto& ast) {
- UNREACHABLE();
- }
-
- void operator()(const ASTDoWhile& ast) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("do {{");
- decomp.code.scope++;
- ASTNode current = ast.nodes.GetFirst();
- while (current) {
- Visit(current);
- current = current->GetNext();
- }
- decomp.code.scope--;
- decomp.code.AddLine("}} while({});", expr_parser.GetResult());
- }
-
- void operator()(const ASTReturn& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- if (ast.kills) {
- decomp.code.AddLine("discard;");
- } else {
- decomp.PreExit();
- decomp.code.AddLine("return;");
- }
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void operator()(const ASTBreak& ast) {
- const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
- if (!is_true) {
- ExprDecompiler expr_parser{decomp};
- std::visit(expr_parser, *ast.condition);
- decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
- decomp.code.scope++;
- }
- decomp.code.AddLine("break;");
- if (!is_true) {
- decomp.code.scope--;
- decomp.code.AddLine("}}");
- }
- }
-
- void Visit(const ASTNode& node) {
- std::visit(*this, *node->GetInnerData());
- }
-
-private:
- GLSLDecompiler& decomp;
-};
-
-void GLSLDecompiler::DecompileAST() {
- const u32 num_flow_variables = ir.GetASTNumVariables();
- for (u32 i = 0; i < num_flow_variables; i++) {
- code.AddLine("bool {} = false;", GetFlowVariable(i));
- }
-
- ASTDecompiler decompiler{*this};
- decompiler.Visit(ir.GetASTProgram());
-}
-
-} // Anonymous namespace
-
-ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
- ShaderEntries entries;
- for (const auto& cbuf : ir.GetConstantBuffers()) {
- entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
- cbuf.first);
- }
- for (const auto& [base, usage] : ir.GetGlobalMemory()) {
- entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
- usage.is_written);
- }
- for (const auto& sampler : ir.GetSamplers()) {
- entries.samplers.emplace_back(sampler);
- }
- for (const auto& image : ir.GetImages()) {
- entries.images.emplace_back(image);
- }
- const auto clip_distances = ir.GetClipDistances();
- for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
- entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
- }
- for (const auto& buffer : entries.const_buffers) {
- entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
- }
- entries.shader_length = ir.GetLength();
- return entries;
-}
-
-std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
- ShaderType stage, std::string_view identifier,
- std::string_view suffix) {
- GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
- decompiler.Decompile();
- return decompiler.GetResult();
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
deleted file mode 100644
index 0397a000c..000000000
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <string>
-#include <string_view>
-#include <utility>
-#include <vector>
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL {
-
-class Device;
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using SamplerEntry = VideoCommon::Shader::SamplerEntry;
-using ImageEntry = VideoCommon::Shader::ImageEntry;
-
-class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
-public:
- explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
- : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
-
- u32 GetIndex() const {
- return index;
- }
-
-private:
- u32 index = 0;
-};
-
-struct GlobalMemoryEntry {
- constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
- bool is_written_)
- : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
- is_written_} {}
-
- u32 cbuf_index = 0;
- u32 cbuf_offset = 0;
- bool is_read = false;
- bool is_written = false;
-};
-
-struct ShaderEntries {
- std::vector<ConstBufferEntry> const_buffers;
- std::vector<GlobalMemoryEntry> global_memory_entries;
- std::vector<SamplerEntry> samplers;
- std::vector<ImageEntry> images;
- std::size_t shader_length{};
- u32 clip_distances{};
- u32 enabled_uniform_buffers{};
-};
-
-ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage);
-
-std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- const VideoCommon::Shader::Registry& registry,
- Tegra::Engines::ShaderType stage, std::string_view identifier,
- std::string_view suffix = {});
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
deleted file mode 100644
index 0deb86517..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <cstring>
-
-#include <fmt/format.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "common/fs/file.h"
-#include "common/fs/fs.h"
-#include "common/fs/path_util.h"
-#include "common/logging/log.h"
-#include "common/scm_rev.h"
-#include "common/settings.h"
-#include "common/zstd_compression.h"
-#include "core/core.h"
-#include "core/hle/kernel/k_process.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-
-namespace OpenGL {
-
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::BindlessSamplerMap;
-using VideoCommon::Shader::BoundSamplerMap;
-using VideoCommon::Shader::KeyMap;
-using VideoCommon::Shader::SeparateSamplerKey;
-using ShaderCacheVersionHash = std::array<u8, 64>;
-
-struct ConstBufferKey {
- u32 cbuf = 0;
- u32 offset = 0;
- u32 value = 0;
-};
-
-struct BoundSamplerEntry {
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct SeparateSamplerEntry {
- u32 cbuf1 = 0;
- u32 cbuf2 = 0;
- u32 offset1 = 0;
- u32 offset2 = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-struct BindlessSamplerEntry {
- u32 cbuf = 0;
- u32 offset = 0;
- Tegra::Engines::SamplerDescriptor sampler;
-};
-
-namespace {
-
-constexpr u32 NativeVersion = 21;
-
-ShaderCacheVersionHash GetShaderCacheVersionHash() {
- ShaderCacheVersionHash hash{};
- const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
- std::memcpy(hash.data(), Common::g_shader_cache_version, length);
- return hash;
-}
-
-} // Anonymous namespace
-
-ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
-
-ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-
-bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
- if (!file.ReadObject(type)) {
- return false;
- }
- u32 code_size;
- u32 code_size_b;
- if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
- return false;
- }
- code.resize(code_size);
- code_b.resize(code_size_b);
- if (file.Read(code) != code_size) {
- return false;
- }
- if (HasProgramA() && file.Read(code_b) != code_size_b) {
- return false;
- }
-
- u8 is_texture_handler_size_known;
- u32 texture_handler_size_value;
- u32 num_keys;
- u32 num_bound_samplers;
- u32 num_separate_samplers;
- u32 num_bindless_samplers;
- if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
- !file.ReadObject(is_texture_handler_size_known) ||
- !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
- !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
- !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
- !file.ReadObject(num_bindless_samplers)) {
- return false;
- }
- if (is_texture_handler_size_known) {
- texture_handler_size = texture_handler_size_value;
- }
-
- std::vector<ConstBufferKey> flat_keys(num_keys);
- std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
- std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
- std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
- if (file.Read(flat_keys) != flat_keys.size() ||
- file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
- file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
- file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
- return false;
- }
- for (const auto& entry : flat_keys) {
- keys.insert({{entry.cbuf, entry.offset}, entry.value});
- }
- for (const auto& entry : flat_bound_samplers) {
- bound_samplers.emplace(entry.offset, entry.sampler);
- }
- for (const auto& entry : flat_separate_samplers) {
- SeparateSamplerKey key;
- key.buffers = {entry.cbuf1, entry.cbuf2};
- key.offsets = {entry.offset1, entry.offset2};
- separate_samplers.emplace(key, entry.sampler);
- }
- for (const auto& entry : flat_bindless_samplers) {
- bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
- }
-
- return true;
-}
-
-bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
- if (!file.WriteObject(static_cast<u32>(type)) ||
- !file.WriteObject(static_cast<u32>(code.size())) ||
- !file.WriteObject(static_cast<u32>(code_b.size()))) {
- return false;
- }
- if (file.Write(code) != code.size()) {
- return false;
- }
- if (HasProgramA() && file.Write(code_b) != code_b.size()) {
- return false;
- }
-
- if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
- !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
- !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
- !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
- !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
- !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
- return false;
- }
-
- std::vector<ConstBufferKey> flat_keys;
- flat_keys.reserve(keys.size());
- for (const auto& [address, value] : keys) {
- flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
- }
-
- std::vector<BoundSamplerEntry> flat_bound_samplers;
- flat_bound_samplers.reserve(bound_samplers.size());
- for (const auto& [address, sampler] : bound_samplers) {
- flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
- }
-
- std::vector<SeparateSamplerEntry> flat_separate_samplers;
- flat_separate_samplers.reserve(separate_samplers.size());
- for (const auto& [key, sampler] : separate_samplers) {
- SeparateSamplerEntry entry;
- std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
- std::tie(entry.offset1, entry.offset2) = key.offsets;
- entry.sampler = sampler;
- flat_separate_samplers.push_back(entry);
- }
-
- std::vector<BindlessSamplerEntry> flat_bindless_samplers;
- flat_bindless_samplers.reserve(bindless_samplers.size());
- for (const auto& [address, sampler] : bindless_samplers) {
- flat_bindless_samplers.push_back(
- BindlessSamplerEntry{address.first, address.second, sampler});
- }
-
- return file.Write(flat_keys) == flat_keys.size() &&
- file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
- file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
- file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
-}
-
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
-
-ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
-
-void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
- title_id = title_id_;
-}
-
-std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
- // Skip games without title id
- const bool has_title_id = title_id != 0;
- if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
- return std::nullopt;
- }
-
- Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No transferable shader cache found");
- is_usable = true;
- return std::nullopt;
- }
-
- u32 version{};
- if (!file.ReadObject(version)) {
- LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
- return std::nullopt;
- }
-
- if (version < NativeVersion) {
- LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
- file.Close();
- InvalidateTransferable();
- is_usable = true;
- return std::nullopt;
- }
- if (version > NativeVersion) {
- LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
- "of the emulator, skipping");
- return std::nullopt;
- }
-
- // Version is valid, load the shaders
- std::vector<ShaderDiskCacheEntry> entries;
- while (static_cast<u64>(file.Tell()) < file.GetSize()) {
- ShaderDiskCacheEntry& entry = entries.emplace_back();
- if (!entry.Load(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
- return std::nullopt;
- }
- }
-
- is_usable = true;
- return {std::move(entries)};
-}
-
-std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
- if (!is_usable) {
- return {};
- }
-
- Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
- return {};
- }
-
- if (const auto result = LoadPrecompiledFile(file)) {
- return *result;
- }
-
- LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
- file.Close();
- InvalidatePrecompiled();
- return {};
-}
-
-std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
- Common::FS::IOFile& file) {
- // Read compressed file from disk and decompress to virtual precompiled cache file
- std::vector<u8> compressed(file.GetSize());
- if (file.Read(compressed) != file.GetSize()) {
- return std::nullopt;
- }
- const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
- SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
- precompiled_cache_virtual_file_offset = 0;
-
- ShaderCacheVersionHash file_hash{};
- if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
- if (GetShaderCacheVersionHash() != file_hash) {
- LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
- precompiled_cache_virtual_file_offset = 0;
- return std::nullopt;
- }
-
- std::vector<ShaderDiskCachePrecompiled> entries;
- while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
- u32 binary_size;
- auto& entry = entries.emplace_back();
- if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
- !LoadObjectFromPrecompiled(entry.binary_format) ||
- !LoadObjectFromPrecompiled(binary_size)) {
- return std::nullopt;
- }
-
- entry.binary.resize(binary_size);
- if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
- return std::nullopt;
- }
- }
- return entries;
-}
-
-void ShaderDiskCacheOpenGL::InvalidateTransferable() {
- if (!Common::FS::RemoveFile(GetTransferablePath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
- Common::FS::PathToUTF8String(GetTransferablePath()));
- }
- InvalidatePrecompiled();
-}
-
-void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
- // Clear virtaul precompiled cache file
- precompiled_cache_virtual_file.Resize(0);
-
- if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
- LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
- Common::FS::PathToUTF8String(GetPrecompiledPath()));
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
- if (!is_usable) {
- return;
- }
-
- const u64 id = entry.unique_identifier;
- if (stored_transferable.contains(id)) {
- // The shader already exists
- return;
- }
-
- Common::FS::IOFile file = AppendTransferableFile();
- if (!file.IsOpen()) {
- return;
- }
- if (!entry.Save(file)) {
- LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
- file.Close();
- InvalidateTransferable();
- return;
- }
-
- stored_transferable.insert(id);
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
- if (!is_usable) {
- return;
- }
-
- // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
- // when writing the dump. This should be done the moment I get access to write to the virtual
- // file.
- if (precompiled_cache_virtual_file.GetSize() == 0) {
- SavePrecompiledHeaderToVirtualPrecompiledCache();
- }
-
- GLint binary_length;
- glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
-
- GLenum binary_format;
- std::vector<u8> binary(binary_length);
- glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
-
- if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
- !SaveArrayToPrecompiled(binary.data(), binary.size())) {
- LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
- unique_identifier);
- InvalidatePrecompiled();
- }
-}
-
-Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
- if (!EnsureDirectories()) {
- return {};
- }
-
- const auto transferable_path{GetTransferablePath()};
- const bool existed = Common::FS::Exists(transferable_path);
-
- Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
- Common::FS::FileType::BinaryFile};
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- if (!existed || file.GetSize() == 0) {
- // If the file didn't exist, write its version
- if (!file.WriteObject(NativeVersion)) {
- LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
- Common::FS::PathToUTF8String(transferable_path));
- return {};
- }
- }
- return file;
-}
-
-void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
- const auto hash{GetShaderCacheVersionHash()};
- if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
- LOG_ERROR(
- Render_OpenGL,
- "Failed to write precompiled cache version hash to virtual precompiled cache file");
- }
-}
-
-void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
- precompiled_cache_virtual_file_offset = 0;
- const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
- const std::vector<u8> compressed =
- Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
-
- const auto precompiled_path = GetPrecompiledPath();
- Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
- Common::FS::FileType::BinaryFile};
-
- if (!file.IsOpen()) {
- LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- return;
- }
- if (file.Write(compressed) != compressed.size()) {
- LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
- Common::FS::PathToUTF8String(precompiled_path));
- }
-}
-
-bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
- const auto CreateDir = [](const std::filesystem::path& dir) {
- if (!Common::FS::CreateDir(dir)) {
- LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
- Common::FS::PathToUTF8String(dir));
- return false;
- }
- return true;
- };
-
- return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
- CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
- CreateDir(GetPrecompiledDir());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
- return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
- return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
- return GetBaseDir() / "transferable";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
- return GetBaseDir() / "precompiled";
-}
-
-std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
- return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
-}
-
-std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", title_id);
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
deleted file mode 100644
index f8bc23868..000000000
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <filesystem>
-#include <optional>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include <glad/glad.h>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/file_sys/vfs_vector.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/registry.h"
-
-namespace Common::FS {
-class IOFile;
-}
-
-namespace OpenGL {
-
-using ProgramCode = std::vector<u64>;
-
-/// Describes a shader and how it's used by the guest GPU
-struct ShaderDiskCacheEntry {
- ShaderDiskCacheEntry();
- ~ShaderDiskCacheEntry();
-
- bool Load(Common::FS::IOFile& file);
-
- bool Save(Common::FS::IOFile& file) const;
-
- bool HasProgramA() const {
- return !code.empty() && !code_b.empty();
- }
-
- Tegra::Engines::ShaderType type{};
- ProgramCode code;
- ProgramCode code_b;
-
- u64 unique_identifier = 0;
- std::optional<u32> texture_handler_size;
- u32 bound_buffer = 0;
- VideoCommon::Shader::GraphicsInfo graphics_info;
- VideoCommon::Shader::ComputeInfo compute_info;
- VideoCommon::Shader::KeyMap keys;
- VideoCommon::Shader::BoundSamplerMap bound_samplers;
- VideoCommon::Shader::SeparateSamplerMap separate_samplers;
- VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
-};
-
-/// Contains an OpenGL dumped binary program
-struct ShaderDiskCachePrecompiled {
- u64 unique_identifier = 0;
- GLenum binary_format = 0;
- std::vector<u8> binary;
-};
-
-class ShaderDiskCacheOpenGL {
-public:
- explicit ShaderDiskCacheOpenGL();
- ~ShaderDiskCacheOpenGL();
-
- /// Binds a title ID for all future operations.
- void BindTitleID(u64 title_id);
-
- /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
- std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
-
- /// Loads current game's precompiled cache. Invalidates on failure.
- std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
-
- /// Removes the transferable (and precompiled) cache file.
- void InvalidateTransferable();
-
- /// Removes the precompiled cache file and clears virtual precompiled cache file.
- void InvalidatePrecompiled();
-
- /// Saves a raw dump to the transferable file. Checks for collisions.
- void SaveEntry(const ShaderDiskCacheEntry& entry);
-
- /// Saves a dump entry to the precompiled file. Does not check for collisions.
- void SavePrecompiled(u64 unique_identifier, GLuint program);
-
- /// Serializes virtual precompiled shader cache file to real file
- void SaveVirtualPrecompiledFile();
-
-private:
- /// Loads the transferable cache. Returns empty on failure.
- std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
- Common::FS::IOFile& file);
-
- /// Opens current game's transferable file and write it's header if it doesn't exist
- Common::FS::IOFile AppendTransferableFile() const;
-
- /// Save precompiled header to precompiled_cache_in_memory
- void SavePrecompiledHeaderToVirtualPrecompiledCache();
-
- /// Create shader disk cache directories. Returns true on success.
- bool EnsureDirectories() const;
-
- /// Gets current game's transferable file path
- std::filesystem::path GetTransferablePath() const;
-
- /// Gets current game's precompiled file path
- std::filesystem::path GetPrecompiledPath() const;
-
- /// Get user's transferable directory path
- std::filesystem::path GetTransferableDir() const;
-
- /// Get user's precompiled directory path
- std::filesystem::path GetPrecompiledDir() const;
-
- /// Get user's shader directory path
- std::filesystem::path GetBaseDir() const;
-
- /// Get current game's title id
- std::string GetTitleID() const;
-
- template <typename T>
- bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
- const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += write_length;
- return write_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
- const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
- data, length, precompiled_cache_virtual_file_offset);
- precompiled_cache_virtual_file_offset += read_length;
- return read_length == sizeof(T) * length;
- }
-
- template <typename T>
- bool SaveObjectToPrecompiled(const T& object) {
- return SaveArrayToPrecompiled(&object, 1);
- }
-
- bool SaveObjectToPrecompiled(bool object) {
- const auto value = static_cast<u8>(object);
- return SaveArrayToPrecompiled(&value, 1);
- }
-
- template <typename T>
- bool LoadObjectFromPrecompiled(T& object) {
- return LoadArrayFromPrecompiled(&object, 1);
- }
-
- // Stores whole precompiled cache which will be read from or saved to the precompiled chache
- // file
- FileSys::VectorVfsFile precompiled_cache_virtual_file;
- // Stores the current offset of the precompiled cache file for IO purposes
- std::size_t precompiled_cache_virtual_file_offset = 0;
-
- // Stored transferable shaders
- std::unordered_set<u64> stored_transferable;
-
- /// Title ID to operate on
- u64 title_id = 0;
-
- // The cache has been loaded at boot
- bool is_usable = false;
-};
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
-
-namespace OpenGL {
-
-namespace {
-
-void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
- if (current == old) {
- return;
- }
- if (current == 0) {
- if (enabled) {
- enabled = false;
- glDisable(stage);
- }
- return;
- }
- if (!enabled) {
- enabled = true;
- glEnable(stage);
- }
- glBindProgramARB(stage, current);
-}
-
-} // Anonymous namespace
-
-ProgramManager::ProgramManager(const Device& device)
- : use_assembly_programs{device.UseAssemblyShaders()} {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- } else {
- graphics_pipeline.Create();
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-ProgramManager::~ProgramManager() = default;
-
-void ProgramManager::BindCompute(GLuint program) {
- if (use_assembly_programs) {
- glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
- } else {
- is_graphics_bound = false;
- glUseProgram(program);
- }
-}
-
-void ProgramManager::BindGraphicsPipeline() {
- if (!use_assembly_programs) {
- UpdateSourcePrograms();
- }
-}
-
-void ProgramManager::BindHostPipeline(GLuint pipeline) {
- if (use_assembly_programs) {
- if (geometry_enabled) {
- geometry_enabled = false;
- old_state.geometry = 0;
- glDisable(GL_GEOMETRY_PROGRAM_NV);
- }
- } else {
- if (!is_graphics_bound) {
- glUseProgram(0);
- }
- }
- glBindProgramPipeline(pipeline);
-}
-
-void ProgramManager::RestoreGuestPipeline() {
- if (use_assembly_programs) {
- glBindProgramPipeline(0);
- } else {
- glBindProgramPipeline(graphics_pipeline.handle);
- }
-}
-
-void ProgramManager::BindHostCompute(GLuint program) {
- if (use_assembly_programs) {
- glDisable(GL_COMPUTE_PROGRAM_NV);
- }
- glUseProgram(program);
- is_graphics_bound = false;
-}
-
-void ProgramManager::RestoreGuestCompute() {
- if (use_assembly_programs) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- glUseProgram(0);
- }
-}
-
-void ProgramManager::UseVertexShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
- }
- current_state.vertex = program;
-}
-
-void ProgramManager::UseGeometryShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
- }
- current_state.geometry = program;
-}
-
-void ProgramManager::UseFragmentShader(GLuint program) {
- if (use_assembly_programs) {
- BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
- }
- current_state.fragment = program;
-}
-
-void ProgramManager::UpdateSourcePrograms() {
- if (!is_graphics_bound) {
- is_graphics_bound = true;
- glUseProgram(0);
- }
-
- const GLuint handle = graphics_pipeline.handle;
- const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
- if (current == old) {
- return;
- }
- glUseProgramStages(handle, stage, current);
- };
- update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
- update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
- update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
-
- old_state = current_state;
-}
-
-void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
- const auto& regs = maxwell.regs;
-
- // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
- y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..d7ef0775d 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,142 @@
#pragma once
-#include <cstddef>
+#include <array>
+#include <span>
#include <glad/glad.h>
+#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
-class Device;
-
-/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
-/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
-/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
-/// Not following that rule will cause problems on some AMD drivers.
-struct alignas(16) MaxwellUniformData {
- void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
-
- GLfloat y_direction;
-};
-static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
-static_assert(sizeof(MaxwellUniformData) < 16384,
- "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
-
class ProgramManager {
-public:
- explicit ProgramManager(const Device& device);
- ~ProgramManager();
-
- /// Binds a compute program
- void BindCompute(GLuint program);
-
- /// Updates bound programs.
- void BindGraphicsPipeline();
-
- /// Binds an OpenGL pipeline object unsynchronized with the guest state.
- void BindHostPipeline(GLuint pipeline);
-
- /// Rewinds BindHostPipeline state changes.
- void RestoreGuestPipeline();
-
- /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
- void BindHostCompute(GLuint program);
+ static constexpr size_t NUM_STAGES = 5;
- /// Rewinds BindHostCompute state changes.
- void RestoreGuestCompute();
-
- void UseVertexShader(GLuint program);
- void UseGeometryShader(GLuint program);
- void UseFragmentShader(GLuint program);
-
-private:
- struct PipelineState {
- GLuint vertex = 0;
- GLuint geometry = 0;
- GLuint fragment = 0;
+ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
- /// Update GLSL programs.
- void UpdateSourcePrograms();
-
- OGLPipeline graphics_pipeline;
-
- PipelineState current_state;
- PipelineState old_state;
-
- bool use_assembly_programs = false;
-
- bool is_graphics_bound = true;
+public:
+ explicit ProgramManager(const Device& device) {
+ glCreateProgramPipelines(1, &pipeline.handle);
+ if (device.UseAssemblyShaders()) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ }
+ }
+
+ void BindComputeProgram(GLuint program) {
+ glUseProgram(program);
+ is_compute_bound = true;
+ }
+
+ void BindComputeAssemblyProgram(GLuint program) {
+ if (current_assembly_compute_program != program) {
+ current_assembly_compute_program = program;
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ }
+ UnbindPipeline();
+ }
+
+ void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+ static constexpr std::array<GLenum, 5> stage_enums{
+ GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+ GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+ };
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+ }
+ }
+ BindPipeline();
+ }
+
+ void BindPresentPrograms(GLuint vertex, GLuint fragment) {
+ if (current_programs[0] != vertex) {
+ current_programs[0] = vertex;
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+ }
+ if (current_programs[4] != fragment) {
+ current_programs[4] = fragment;
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+ }
+ glUseProgramStages(
+ pipeline.handle,
+ GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+ current_programs[1] = 0;
+ current_programs[2] = 0;
+ current_programs[3] = 0;
+
+ if (current_stage_mask != 0) {
+ current_stage_mask = 0;
+ for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+ glDisable(program_type);
+ }
+ }
+ BindPipeline();
+ }
+
+ void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+ u32 stage_mask) {
+ const u32 changed_mask = current_stage_mask ^ stage_mask;
+ current_stage_mask = stage_mask;
+
+ if (changed_mask != 0) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (((changed_mask >> stage) & 1) != 0) {
+ if (((stage_mask >> stage) & 1) != 0) {
+ glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ } else {
+ glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ }
+ }
+ }
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+ }
+ }
+ UnbindPipeline();
+ }
+
+ void RestoreGuestCompute() {}
- bool vertex_enabled = false;
- bool geometry_enabled = false;
- bool fragment_enabled = false;
+private:
+ void BindPipeline() {
+ if (!is_pipeline_bound) {
+ is_pipeline_bound = true;
+ glBindProgramPipeline(pipeline.handle);
+ }
+ UnbindCompute();
+ }
+
+ void UnbindPipeline() {
+ if (is_pipeline_bound) {
+ is_pipeline_bound = false;
+ glBindProgramPipeline(0);
+ }
+ UnbindCompute();
+ }
+
+ void UnbindCompute() {
+ if (is_compute_bound) {
+ is_compute_bound = false;
+ glUseProgram(0);
+ }
+ }
+
+ OGLPipeline pipeline;
+ bool is_pipeline_bound{};
+ bool is_compute_bound{};
+
+ u32 current_stage_mask = 0;
+ std::array<GLuint, NUM_STAGES> current_programs{};
+ GLuint current_assembly_compute_program = 0;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 4bf0d6090..d432072ad 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -5,57 +5,108 @@
#include <string_view>
#include <vector>
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/settings.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
-namespace {
+static OGLProgram LinkSeparableProgram(GLuint shader) {
+ OGLProgram program;
+ program.handle = glCreateProgram();
+ glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ glAttachShader(program.handle, shader);
+ glLinkProgram(program.handle);
+ if (!Settings::values.renderer_debug) {
+ return program;
+ }
+ GLint link_status{};
+ glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
-std::string_view StageDebugName(GLenum type) {
- switch (type) {
- case GL_VERTEX_SHADER:
- return "vertex";
- case GL_GEOMETRY_SHADER:
- return "geometry";
- case GL_FRAGMENT_SHADER:
- return "fragment";
- case GL_COMPUTE_SHADER:
- return "compute";
+ GLint log_length{};
+ glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return program;
+ }
+ std::string log(log_length, 0);
+ glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
+ if (link_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
}
- UNIMPLEMENTED();
- return "unknown";
+ return program;
}
-} // Anonymous namespace
+static void LogShader(GLuint shader, std::string_view code = {}) {
+ GLint shader_status{};
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "Failed to build shader");
+ }
+ GLint log_length{};
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+ if (log_length == 0) {
+ return;
+ }
+ std::string log(log_length, 0);
+ glGetShaderInfoLog(shader, log_length, nullptr, log.data());
+ if (shader_status == GL_FALSE) {
+ LOG_ERROR(Render_OpenGL, "{}", log);
+ if (!code.empty()) {
+ LOG_INFO(Render_OpenGL, "\n{}", code);
+ }
+ } else {
+ LOG_WARNING(Render_OpenGL, "{}", log);
+ }
+}
-GLuint LoadShader(std::string_view source, GLenum type) {
- const std::string_view debug_type = StageDebugName(type);
- const GLuint shader_id = glCreateShader(type);
+OGLProgram CreateProgram(std::string_view code, GLenum stage) {
+ OGLShader shader;
+ shader.handle = glCreateShader(stage);
- const GLchar* source_string = source.data();
- const GLint source_length = static_cast<GLint>(source.size());
+ const GLint length = static_cast<GLint>(code.size());
+ const GLchar* const code_ptr = code.data();
+ glShaderSource(shader.handle, 1, &code_ptr, &length);
+ glCompileShader(shader.handle);
+ if (Settings::values.renderer_debug) {
+ LogShader(shader.handle, code);
+ }
+ return LinkSeparableProgram(shader.handle);
+}
- glShaderSource(shader_id, 1, &source_string, &source_length);
- LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
- glCompileShader(shader_id);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
+ OGLShader shader;
+ shader.handle = glCreateShader(stage);
- GLint result = GL_FALSE;
- GLint info_log_length;
- glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result);
- glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
+ glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
+ static_cast<GLsizei>(code.size_bytes()));
+ glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
+ if (Settings::values.renderer_debug) {
+ LogShader(shader.handle);
+ }
+ return LinkSeparableProgram(shader.handle);
+}
- if (info_log_length > 1) {
- std::string shader_error(info_log_length, ' ');
- glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "{}", shader_error);
- } else {
- LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error);
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {
+ OGLAssemblyProgram program;
+ glGenProgramsARB(1, &program.handle);
+ glNamedProgramStringEXT(program.handle, target, GL_PROGRAM_FORMAT_ASCII_ARB,
+ static_cast<GLsizei>(code.size()), code.data());
+ if (Settings::values.renderer_debug) {
+ const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
+ if (err && *err) {
+ if (std::strstr(err, "error")) {
+ LOG_CRITICAL(Render_OpenGL, "\n{}", err);
+ LOG_INFO(Render_OpenGL, "\n{}", code);
+ } else {
+ LOG_WARNING(Render_OpenGL, "\n{}", err);
+ }
}
}
- return shader_id;
+ return program;
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 1b770532e..4e1a2a8e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -4,92 +4,23 @@
#pragma once
+#include <span>
#include <string>
+#include <string_view>
#include <vector>
+
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
-namespace OpenGL::GLShader {
-
-/**
- * Utility function to log the source code of a list of shaders.
- * @param shaders The OpenGL shaders whose source we will print.
- */
-template <typename... T>
-void LogShaderSource(T... shaders) {
- auto shader_list = {shaders...};
-
- for (const auto& shader : shader_list) {
- if (shader == 0)
- continue;
-
- GLint source_length;
- glGetShaderiv(shader, GL_SHADER_SOURCE_LENGTH, &source_length);
-
- std::string source(source_length, ' ');
- glGetShaderSource(shader, source_length, nullptr, &source[0]);
- LOG_INFO(Render_OpenGL, "Shader source {}", source);
- }
-}
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader
- * @param source String of the GLSL shader program
- * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
- */
-GLuint LoadShader(std::string_view source, GLenum type);
-
-/**
- * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
- * @param separable_program whether to create a separable program
- * @param shaders ID of shaders to attach to the program
- * @returns Handle of the newly created OpenGL program object
- */
-template <typename... T>
-GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
- // Link the program
- LOG_DEBUG(Render_OpenGL, "Linking program...");
-
- GLuint program_id = glCreateProgram();
-
- ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...);
-
- if (separable_program) {
- glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
- }
- if (hint_retrievable) {
- glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
- }
-
- glLinkProgram(program_id);
-
- // Check the program
- GLint result = GL_FALSE;
- GLint info_log_length;
- glGetProgramiv(program_id, GL_LINK_STATUS, &result);
- glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length);
-
- if (info_log_length > 1) {
- std::string program_error(info_log_length, ' ');
- glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]);
- if (result == GL_TRUE) {
- LOG_DEBUG(Render_OpenGL, "{}", program_error);
- } else {
- LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error);
- }
- }
-
- if (result == GL_FALSE) {
- // There was a problem linking the shader, print the source for debugging purposes.
- LogShaderSource(shaders...);
- }
+namespace OpenGL {
- ASSERT_MSG(result == GL_TRUE, "Shader not linked");
+OGLProgram CreateProgram(std::string_view code, GLenum stage);
- ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...);
+OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
- return program_id;
-}
+OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index dbdf5230f..586da84e3 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -83,11 +83,6 @@ void SetupDirtyScissors(Tables& tables) {
FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
}
-void SetupDirtyShaders(Tables& tables) {
- FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
- Shaders);
-}
-
void SetupDirtyPolygonModes(Tables& tables) {
tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
@@ -217,7 +212,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
SetupDirtyScissors(tables);
SetupDirtyVertexInstances(tables);
SetupDirtyVertexFormat(tables);
- SetupDirtyShaders(tables);
SetupDirtyPolygonModes(tables);
SetupDirtyDepthTest(tables);
SetupDirtyStencilTest(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 94c905116..5864c7c07 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -52,7 +52,6 @@ enum : u8 {
BlendState0,
BlendState7 = BlendState0 + 7,
- Shaders,
ClipDistances,
PolygonModes,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index a2c1599f7..c373c9cb4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
-
namespace {
-
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
GLsizei depth;
};
-struct FormatTuple {
- GLenum internal_format;
- GLenum format = GL_NONE;
- GLenum type = GL_NONE;
-};
-
-constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
- {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
- {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
- {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
- {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
- {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
- {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
- {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
- {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
- {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
- {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
- {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
- {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
- {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
- {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
- {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
- {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
- {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
- {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
- {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
- {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
- {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
- {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
- {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
- {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
- {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
- {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
- {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
- {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
- {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
- {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
- {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
- {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
- {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
- {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
- {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
- {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
- {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
- {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
- {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
- {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
- {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
- {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
- {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
- {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
- {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
- {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
- {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
- {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
- {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
- {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
- {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
- {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
- {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
- {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
- {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
- {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
- {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
- {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
- {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
- {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
- {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
- {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
- {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
- {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
- {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
- {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
- {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
- {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
- {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
- GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
-}};
-
constexpr std::array ACCELERATED_FORMATS{
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
};
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
- ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
- return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
-}
-
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
switch (info.type) {
case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
return GL_NONE;
}
-GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
+GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
const bool is_multisampled = num_samples > 1;
switch (type) {
- case ImageViewType::e1D:
+ case Shader::TextureType::Color1D:
return GL_TEXTURE_1D;
- case ImageViewType::e2D:
+ case Shader::TextureType::Color2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
- case ImageViewType::Cube:
+ case Shader::TextureType::ColorCube:
return GL_TEXTURE_CUBE_MAP;
- case ImageViewType::e3D:
+ case Shader::TextureType::Color3D:
return GL_TEXTURE_3D;
- case ImageViewType::e1DArray:
+ case Shader::TextureType::ColorArray1D:
return GL_TEXTURE_1D_ARRAY;
- case ImageViewType::e2DArray:
+ case Shader::TextureType::ColorArray2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
- case ImageViewType::CubeArray:
+ case Shader::TextureType::ColorArrayCube:
return GL_TEXTURE_CUBE_MAP_ARRAY;
- case ImageViewType::Rect:
- return GL_TEXTURE_RECTANGLE;
- case ImageViewType::Buffer:
+ case Shader::TextureType::Buffer:
return GL_TEXTURE_BUFFER;
}
UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,12 +212,13 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
default:
return false;
}
- const GLenum internal_format = GetFormatTuple(info.format).internal_format;
+ const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
if (format_info.is_compressed) {
return false;
}
- if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
+ if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) ==
+ ACCELERATED_FORMATS.end()) {
return false;
}
if (format_info.compatibility_by_size) {
@@ -413,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
- const GLuint texture = image_view->DefaultHandle();
- glNamedFramebufferTexture(fbo, attachment, texture, 0);
+ glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
return;
}
- const GLuint texture = image_view->Handle(ImageViewType::e3D);
+ const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
if (image_view->range.extent.layers > 1) {
// TODO: OpenGL doesn't support rendering to a fixed number of slices
glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -438,6 +328,28 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
}
}
+[[nodiscard]] GLenum ShaderFormat(Shader::ImageFormat format) {
+ switch (format) {
+ case Shader::ImageFormat::Typeless:
+ break;
+ case Shader::ImageFormat::R8_SINT:
+ return GL_R8I;
+ case Shader::ImageFormat::R8_UINT:
+ return GL_R8UI;
+ case Shader::ImageFormat::R16_UINT:
+ return GL_R16UI;
+ case Shader::ImageFormat::R16_SINT:
+ return GL_R16I;
+ case Shader::ImageFormat::R32_UINT:
+ return GL_R32UI;
+ case Shader::ImageFormat::R32G32_UINT:
+ return GL_RG32UI;
+ case Shader::ImageFormat::R32G32B32A32_UINT:
+ return GL_RGBA32UI;
+ }
+ UNREACHABLE_MSG("Invalid image format={}", format);
+ return GL_R32UI;
+}
} // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
@@ -452,7 +364,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
- for (const FormatTuple& tuple : FORMAT_TABLE) {
+ for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
const GLenum format = tuple.internal_format;
GLint compat_class;
GLint compat_type;
@@ -474,11 +386,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
null_image_3d.Create(GL_TEXTURE_3D);
- null_image_rect.Create(GL_TEXTURE_RECTANGLE);
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
- glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
std::array<GLuint, 4> new_handles;
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -495,29 +405,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
GL_R8, 0, 1, 0, 6);
const std::array texture_handles{
- null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
- null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
- null_image_view_2d_array.handle, null_image_view_cube.handle,
+ null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
+ null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
+ null_image_view_cube.handle,
};
for (const GLuint handle : texture_handles) {
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
}
- const auto set_view = [this](ImageViewType type, GLuint handle) {
+ const auto set_view = [this](Shader::TextureType type, GLuint handle) {
if (device.HasDebuggingToolAttached()) {
const std::string name = fmt::format("NullImage {}", type);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
null_image_views[static_cast<size_t>(type)] = handle;
};
- set_view(ImageViewType::e1D, null_image_view_1d.handle);
- set_view(ImageViewType::e2D, null_image_view_2d.handle);
- set_view(ImageViewType::Cube, null_image_view_cube.handle);
- set_view(ImageViewType::e3D, null_image_3d.handle);
- set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
- set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
- set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
- set_view(ImageViewType::Rect, null_image_rect.handle);
+ set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
+ set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
+ set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
+ set_view(Shader::TextureType::Color3D, null_image_3d.handle);
+ set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
+ set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
+ set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -709,7 +618,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
- const auto& tuple = GetFormatTuple(info.format);
+ const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
gl_format = tuple.format;
gl_type = tuple.type;
@@ -749,8 +658,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
- buffer.Create();
- glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
+ UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -788,14 +696,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
}
-void Image::UploadMemory(const ImageBufferMap& map,
- std::span<const VideoCommon::BufferCopy> copies) {
- for (const VideoCommon::BufferCopy& copy : copies) {
- glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
- copy.dst_offset, copy.size);
- }
-}
-
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -957,23 +857,30 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
if (True(image.flags & ImageFlagBits::Converted)) {
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} else {
- internal_format = GetFormatTuple(format).internal_format;
+ internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
+ }
+ full_range = info.range;
+ flat_range = info.range;
+ set_object_label = device.HasDebuggingToolAttached();
+ is_render_target = info.IsRenderTarget();
+ original_texture = image.texture.handle;
+ num_samples = image.info.num_samples;
+ if (!is_render_target) {
+ swizzle[0] = info.x_source;
+ swizzle[1] = info.y_source;
+ swizzle[2] = info.z_source;
+ swizzle[3] = info.w_source;
}
- VideoCommon::SubresourceRange flatten_range = info.range;
- std::array<GLuint, 2> handles;
- stored_views.reserve(2);
-
switch (info.type) {
case ImageViewType::e1DArray:
- flatten_range.extent.layers = 1;
+ flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e1D:
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
+ SetupView(Shader::TextureType::Color1D);
+ SetupView(Shader::TextureType::ColorArray1D);
break;
case ImageViewType::e2DArray:
- flatten_range.extent.layers = 1;
+ flat_range.extent.layers = 1;
[[fallthrough]];
case ImageViewType::e2D:
if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
@@ -983,63 +890,126 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.base = {.level = info.range.base.level, .layer = 0},
.extent = {.levels = 1, .layers = 1},
};
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
- break;
+ full_range = slice_range;
+
+ SetupView(Shader::TextureType::Color3D);
+ } else {
+ SetupView(Shader::TextureType::Color2D);
+ SetupView(Shader::TextureType::ColorArray2D);
}
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
break;
case ImageViewType::e3D:
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
+ SetupView(Shader::TextureType::Color3D);
break;
case ImageViewType::CubeArray:
- flatten_range.extent.layers = 6;
+ flat_range.extent.layers = 6;
[[fallthrough]];
case ImageViewType::Cube:
- glGenTextures(2, handles.data());
- SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
- SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
+ SetupView(Shader::TextureType::ColorCube);
+ SetupView(Shader::TextureType::ColorArrayCube);
break;
case ImageViewType::Rect:
- glGenTextures(1, handles.data());
- SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
+ UNIMPLEMENTED();
break;
case ImageViewType::Buffer:
- glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
- SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
+ UNREACHABLE();
+ break;
+ }
+ switch (info.type) {
+ case ImageViewType::e1D:
+ default_handle = Handle(Shader::TextureType::Color1D);
+ break;
+ case ImageViewType::e1DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray1D);
+ break;
+ case ImageViewType::e2D:
+ default_handle = Handle(Shader::TextureType::Color2D);
+ break;
+ case ImageViewType::e2DArray:
+ default_handle = Handle(Shader::TextureType::ColorArray2D);
+ break;
+ case ImageViewType::e3D:
+ default_handle = Handle(Shader::TextureType::Color3D);
+ break;
+ case ImageViewType::Cube:
+ default_handle = Handle(Shader::TextureType::ColorCube);
+ break;
+ case ImageViewType::CubeArray:
+ default_handle = Handle(Shader::TextureType::ColorArrayCube);
+ break;
+ default:
break;
}
- default_handle = Handle(info.type);
}
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
+ : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
+ buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
+
+ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info)
+ : VideoCommon::ImageViewBase{info, view_info} {}
+
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
-void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
- GLuint handle, const VideoCommon::ImageViewInfo& info,
- VideoCommon::SubresourceRange view_range) {
- if (info.type == ImageViewType::Buffer) {
- // TODO: Take offset from buffer cache
- glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
- image.guest_size_bytes);
- } else {
- const GLuint parent = image.texture.handle;
- const GLenum target = ImageTarget(view_type, image.info.num_samples);
- glTextureView(handle, target, parent, internal_format, view_range.base.level,
- view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
- if (!info.IsRenderTarget()) {
- ApplySwizzle(handle, format, info.Swizzle());
- }
+GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
+ if (image_format == Shader::ImageFormat::Typeless) {
+ return Handle(texture_type);
+ }
+ const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
+ image_format == Shader::ImageFormat::R16_SINT};
+ if (!storage_views) {
+ storage_views = std::make_unique<StorageViews>();
+ }
+ auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+ GLuint& view{type_views[static_cast<size_t>(texture_type)]};
+ if (view == 0) {
+ view = MakeView(texture_type, ShaderFormat(image_format));
+ }
+ return view;
+}
+
+void ImageView::SetupView(Shader::TextureType view_type) {
+ views[static_cast<size_t>(view_type)] = MakeView(view_type, internal_format);
+}
+
+GLuint ImageView::MakeView(Shader::TextureType view_type, GLenum view_format) {
+ VideoCommon::SubresourceRange view_range;
+ switch (view_type) {
+ case Shader::TextureType::Color1D:
+ case Shader::TextureType::Color2D:
+ case Shader::TextureType::ColorCube:
+ view_range = flat_range;
+ break;
+ case Shader::TextureType::ColorArray1D:
+ case Shader::TextureType::ColorArray2D:
+ case Shader::TextureType::Color3D:
+ case Shader::TextureType::ColorArrayCube:
+ view_range = full_range;
+ break;
+ default:
+ UNREACHABLE();
}
- if (device.HasDebuggingToolAttached()) {
- const std::string name = VideoCommon::Name(*this, view_type);
- glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
+ OGLTextureView& view = stored_views.emplace_back();
+ view.Create();
+
+ const GLenum target = ImageTarget(view_type, num_samples);
+ glTextureView(view.handle, target, original_texture, view_format, view_range.base.level,
+ view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
+ if (!is_render_target) {
+ std::array<SwizzleSource, 4> casted_swizzle;
+ std::ranges::transform(swizzle, casted_swizzle.begin(), [](u8 component_swizzle) {
+ return static_cast<SwizzleSource>(component_swizzle);
+ });
+ ApplySwizzle(view.handle, format, casted_swizzle);
+ }
+ if (set_object_label) {
+ const std::string name = VideoCommon::Name(*this);
+ glObjectLabel(GL_TEXTURE, view.handle, static_cast<GLsizei>(name.size()), name.data());
}
- stored_views.emplace_back().handle = handle;
- views[static_cast<size_t>(view_type)] = handle;
+ return view.handle;
}
Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 25fe61566..921072ebe 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
#include <glad/glad.h>
+#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
@@ -122,18 +123,17 @@ private:
bool has_broken_texture_view_formats = false;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
- StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
+ StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
- OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
};
class Image : public VideoCommon::ImageBase {
@@ -154,8 +154,6 @@ public:
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
- void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
-
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@@ -170,7 +168,6 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
- OGLBuffer buffer;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
@@ -182,10 +179,17 @@ class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
+ const VideoCommon::ImageViewInfo&, GPUVAddr);
+ explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
+ const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
- [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
- return views[static_cast<size_t>(query_type)];
+ [[nodiscard]] GLuint StorageView(Shader::TextureType texture_type,
+ Shader::ImageFormat image_format);
+
+ [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
+ return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -196,15 +200,38 @@ public:
return internal_format;
}
+ [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
+ return gpu_addr;
+ }
+
+ [[nodiscard]] u32 BufferSize() const noexcept {
+ return buffer_size;
+ }
+
private:
- void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
- const VideoCommon::ImageViewInfo& info,
- VideoCommon::SubresourceRange view_range);
+ struct StorageViews {
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> signeds{};
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> unsigneds{};
+ };
+
+ void SetupView(Shader::TextureType view_type);
+
+ GLuint MakeView(Shader::TextureType view_type, GLenum view_format);
- std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
+ std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
- GLuint default_handle = 0;
+ std::unique_ptr<StorageViews> storage_views;
GLenum internal_format = GL_NONE;
+ GLuint default_handle = 0;
+ GPUVAddr gpu_addr = 0;
+ u32 buffer_size = 0;
+ GLuint original_texture = 0;
+ int num_samples = 0;
+ VideoCommon::SubresourceRange flat_range;
+ VideoCommon::SubresourceRange full_range;
+ std::array<u8, 4> swizzle{};
+ bool set_object_label = false;
+ bool is_render_target = false;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
#pragma once
#include <glad/glad.h>
+
#include "video_core/engines/maxwell_3d.h"
+#include "video_core/surface.h"
namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct FormatTuple {
+ GLenum internal_format;
+ GLenum format = GL_NONE;
+ GLenum type = GL_NONE;
+};
+
+constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
+ {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
+ {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
+ {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
+ {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
+ {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
+ {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
+ {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
+ {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
+ {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
+ {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
+ {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
+ {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
+ {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
+ {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
+ {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
+ {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
+ {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
+ {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
+ {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
+ {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
+ {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
+ {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
+ {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
+ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
+ {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
+ {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
+ {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
+ {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
+ {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
+ {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
+ {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
+ {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
+ {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
+ {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
+ {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
+ {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
+ {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
+ {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
+ {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
+ {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
+ {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
+ {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
+ {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
+ {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
+ {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
+ {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
+ {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
+ {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
+ {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
+ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
+ {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
+ {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
+ {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
+ {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
+ {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
+ {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
+ {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
+ {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
+ {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
+ {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
+ GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
+}};
+
+inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
+ ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
+ return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
+}
+
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a718bff7a..f1b00c24c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -25,6 +25,7 @@
#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/textures/decoders.h"
@@ -139,6 +140,26 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
}
AddTelemetryFields();
InitOpenGLObjects();
+
+ // Initialize default attributes to match hardware's disabled attributes
+ GLint max_attribs{};
+ glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_attribs);
+ for (GLint attrib = 0; attrib < max_attribs; ++attrib) {
+ glVertexAttrib4f(attrib, 0.0f, 0.0f, 0.0f, 1.0f);
+ }
+ // Enable seamless cubemaps when per texture parameters are not available
+ if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
+ glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+ }
+ // Enable unified vertex attributes and query vertex buffer address when the driver supports it
+ if (device.HasVertexBufferUnifiedMemory()) {
+ glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
+ glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
+
+ glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
+ glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
+ &vertex_buffer_address);
+ }
}
RendererOpenGL::~RendererOpenGL() = default;
@@ -229,22 +250,9 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
}
void RendererOpenGL::InitOpenGLObjects() {
- glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
- Settings::values.bg_blue.GetValue(), 0.0f);
-
// Create shader programs
- OGLShader vertex_shader;
- vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
-
- OGLShader fragment_shader;
- fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
-
- vertex_program.Create(true, false, vertex_shader.handle);
- fragment_program.Create(true, false, fragment_shader.handle);
-
- pipeline.Create();
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
+ present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
+ present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
// Generate presentation sampler
present_sampler.Create();
@@ -266,21 +274,6 @@ void RendererOpenGL::InitOpenGLObjects() {
// Clear screen to black
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
-
- // Enable seamless cubemaps when per texture parameters are not available
- if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
- glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
- }
-
- // Enable unified vertex attributes and query vertex buffer address when the driver supports it
- if (device.HasVertexBufferUnifiedMemory()) {
- glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
- glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
-
- glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
- glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
- &vertex_buffer_address);
- }
}
void RendererOpenGL::AddTelemetryFields() {
@@ -335,17 +328,17 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
}
void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
- if (renderer_settings.set_background_color) {
- // Update background color before drawing
- glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
- Settings::values.bg_blue.GetValue(), 0.0f);
- }
+ // Update background color before drawing
+ glClearColor(Settings::values.bg_red.GetValue() / 255.0f,
+ Settings::values.bg_green.GetValue() / 255.0f,
+ Settings::values.bg_blue.GetValue() / 255.0f, 1.0f);
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
- std::data(ortho_matrix));
+ program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
+ glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
+ ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
@@ -406,8 +399,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
- program_manager.BindHostPipeline(pipeline.handle);
-
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@@ -455,7 +446,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- program_manager.RestoreGuestPipeline();
+ // TODO
+ // program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0b66f8332..d455f572f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@@ -111,9 +110,8 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
- OGLProgram vertex_program;
- OGLProgram fragment_program;
- OGLPipeline pipeline;
+ OGLProgram present_vertex;
+ OGLProgram present_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index abaf1ee6a..37a4d1d9d 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,8 +16,8 @@
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
-#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h"
@@ -41,21 +41,14 @@ using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
using VideoCore::Surface::BytesPerBlock;
namespace {
-
OGLProgram MakeProgram(std::string_view source) {
- OGLShader shader;
- shader.Create(source, GL_COMPUTE_SHADER);
-
- OGLProgram program;
- program.Create(true, false, shader.handle);
- return program;
+ return CreateProgram(source, GL_COMPUTE_SHADER);
}
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
return static_cast<size_t>(copy.extent.width * copy.extent.height *
copy.src_subresource.num_layers);
}
-
} // Anonymous namespace
UtilShaders::UtilShaders(ProgramManager& program_manager_)
@@ -86,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
- program_manager.BindHostCompute(astc_decoder_program.handle);
+ program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
@@ -134,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
- program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
+ program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@@ -173,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
- program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
+ program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -222,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
- program_manager.BindHostCompute(pitch_unswizzle_program.handle);
+ program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
@@ -250,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
- program_manager.BindHostCompute(copy_bc4_program.handle);
+ program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
@@ -261,9 +254,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
- copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
+ copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI);
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
- copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
+ copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
}
program_manager.RestoreGuestCompute();
@@ -286,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break;
case 4: {
// BGRA8 copy
- program_manager.BindHostCompute(copy_bgra_program.handle);
+ program_manager.BindComputeProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);