summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h20
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp749
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h21
6 files changed, 389 insertions, 605 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7b836cc94..2b29fc45f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -108,19 +108,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
- GLint ext_num;
- glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
- for (GLint i = 0; i < ext_num; i++) {
- const std::string_view extension{
- reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
-
- if (extension == "GL_ARB_direct_state_access") {
- has_ARB_direct_state_access = true;
- } else if (extension == "GL_ARB_multi_bind") {
- has_ARB_multi_bind = true;
- }
- }
-
OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
@@ -312,6 +299,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
+ std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
@@ -372,12 +360,22 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
primitive_mode, current_texture_bindpoint);
+ // Workaround for Intel drivers.
+ // When a clip distance is enabled but not set in the shader it crops parts of the screen
+ // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
+ // clip distances only when it's written by a shader stage.
+ for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) {
+ clip_distances[i] |= shader->GetShaderEntries().clip_distances[i];
+ }
+
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
}
+
+ SyncClipEnabled(clip_distances);
}
void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
@@ -498,7 +496,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
// TODO(bunnei): Figure out how the below register works. According to envytools, this should be
// used to enable multiple render targets. However, it is left unset on all games that I have
// tested.
- ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
+ UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
// Bind the framebuffer surfaces
current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
@@ -680,7 +678,6 @@ void RasterizerOpenGL::DrawArrays() {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest(state);
- SyncClipEnabled();
// Alpha Testing is synced on shaders.
SyncTransformFeedback();
SyncPointState();
@@ -1057,20 +1054,23 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
}
-void RasterizerOpenGL::SyncClipEnabled() {
+void RasterizerOpenGL::SyncClipEnabled(
+ const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
+
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- state.clip_distance[0] = regs.clip_distance_enabled.c0 != 0;
- state.clip_distance[1] = regs.clip_distance_enabled.c1 != 0;
- state.clip_distance[2] = regs.clip_distance_enabled.c2 != 0;
- state.clip_distance[3] = regs.clip_distance_enabled.c3 != 0;
- state.clip_distance[4] = regs.clip_distance_enabled.c4 != 0;
- state.clip_distance[5] = regs.clip_distance_enabled.c5 != 0;
- state.clip_distance[6] = regs.clip_distance_enabled.c6 != 0;
- state.clip_distance[7] = regs.clip_distance_enabled.c7 != 0;
+ const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
+ regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
+ regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
+ regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0,
+ regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0};
+
+ for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
+ state.clip_distance[i] = reg_state[i] && clip_mask[i];
+ }
}
void RasterizerOpenGL::SyncClipCoef() {
- UNREACHABLE();
+ UNIMPLEMENTED();
}
void RasterizerOpenGL::SyncCullMode() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 959e8df63..8a891ffc7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -61,20 +61,6 @@ public:
bool AccelerateDrawBatch(bool is_indexed) override;
void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
- /// OpenGL shader generated for a given Maxwell register state
- struct MaxwellShader {
- /// OpenGL shader resource
- OGLProgram shader;
- };
-
- struct VertexShader {
- OGLShader shader;
- };
-
- struct FragmentShader {
- OGLShader shader;
- };
-
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
@@ -143,7 +129,8 @@ private:
void SyncViewport(OpenGLState& current_state);
/// Syncs the clip enabled status to match the guest state
- void SyncClipEnabled();
+ void SyncClipEnabled(
+ const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
@@ -194,9 +181,6 @@ private:
/// but are needed for correct emulation
void CheckExtensions();
- bool has_ARB_direct_state_access = false;
- bool has_ARB_multi_bind = false;
-
OpenGLState state;
RasterizerCacheOpenGL res_cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index dde2f468d..5f4cdd119 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -405,138 +405,6 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
}
}
-MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
-static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
- GLuint read_fb_handle, GLuint draw_fb_handle, GLenum src_attachment = 0,
- GLenum dst_attachment = 0, std::size_t cubemap_face = 0) {
- MICROPROFILE_SCOPE(OpenGL_BlitSurface);
-
- const auto& src_params{src_surface->GetSurfaceParams()};
- const auto& dst_params{dst_surface->GetSurfaceParams()};
-
- OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({ prev_state.Apply(); });
-
- OpenGLState state;
- state.draw.read_framebuffer = read_fb_handle;
- state.draw.draw_framebuffer = draw_fb_handle;
- // Set sRGB enabled if the destination surfaces need it
- state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
- state.ApplyFramebufferState();
-
- u32 buffers{};
-
- if (src_params.type == SurfaceType::ColorTexture) {
- switch (src_params.target) {
- case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, src_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- case SurfaceTarget::TextureCubemap:
- glFramebufferTexture2D(
- GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
- src_surface->Texture().handle, 0);
- glFramebufferTexture2D(
- GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
- break;
- case SurfaceTarget::Texture2DArray:
- glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- src_surface->Texture().handle, 0, 0);
- glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
- break;
- case SurfaceTarget::Texture3D:
- glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- SurfaceTargetToGL(src_params.target),
- src_surface->Texture().handle, 0, 0);
- glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- SurfaceTargetToGL(src_params.target), 0, 0, 0);
- break;
- default:
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, src_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- }
-
- switch (dst_params.target) {
- case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- case SurfaceTarget::TextureCubemap:
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
- dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(
- GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
- break;
- case SurfaceTarget::Texture2DArray:
- glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- dst_surface->Texture().handle, 0, 0);
- glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
- break;
-
- case SurfaceTarget::Texture3D:
- glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- SurfaceTargetToGL(dst_params.target),
- dst_surface->Texture().handle, 0, 0);
- glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- SurfaceTargetToGL(dst_params.target), 0, 0, 0);
- break;
- default:
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- break;
- }
-
- buffers = GL_COLOR_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- src_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- dst_surface->Texture().handle, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
- buffers = GL_DEPTH_BUFFER_BIT;
- } else if (src_params.type == SurfaceType::DepthStencil) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- src_surface->Texture().handle, 0);
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- dst_surface->Texture().handle, 0);
-
- buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
- }
-
- const auto& rect{src_params.GetRect()};
- glBlitFramebuffer(rect.left, rect.bottom, rect.right, rect.top, rect.left, rect.bottom,
- rect.right, rect.top, buffers,
- buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
-
- return true;
-}
-
static void FastCopySurface(const Surface& src_surface, const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
@@ -841,9 +709,10 @@ void CachedSurface::LoadGLBuffer() {
const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
gl_buffer[0].assign(texture_src_data, texture_src_data_end);
}
- for (u32 i = 0; i < params.max_mip_level; i++)
+ for (u32 i = 0; i < params.max_mip_level; i++) {
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i),
params.MipHeight(i), params.MipDepth(i));
+ }
}
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
@@ -1163,7 +1032,10 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
const Surface& dst_surface) {
const auto& src_params{src_surface->GetSurfaceParams()};
const auto& dst_params{dst_surface->GetSurfaceParams()};
- FlushRegion(src_params.addr, dst_params.MemorySize());
+
+ // Flush enough memory for both the source and destination surface
+ FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize()));
+
LoadSurface(dst_surface);
}
@@ -1189,20 +1061,9 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
return new_surface;
}
- // If the format is the same, just do a framebuffer blit. This is significantly faster than
- // using PBOs. The is also likely less accurate, as textures will be converted rather than
- // reinterpreted. When use_accurate_gpu_emulation setting is enabled, perform a more accurate
- // surface copy, where pixels are reinterpreted as a new format (without conversion). This
- // code path uses OpenGL PBOs and is quite slow.
- const bool is_blit{old_params.pixel_format == new_params.pixel_format};
-
switch (new_params.target) {
case SurfaceTarget::Texture2D:
- if (is_blit) {
- BlitSurface(old_surface, new_surface, read_framebuffer.handle, draw_framebuffer.handle);
- } else {
- CopySurface(old_surface, new_surface, copy_pbo.handle);
- }
+ CopySurface(old_surface, new_surface, copy_pbo.handle);
break;
case SurfaceTarget::Texture3D:
AccurateCopySurface(old_surface, new_surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0c4524d5c..4fc09cac6 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -50,6 +50,14 @@ public:
using std::runtime_error::runtime_error;
};
+/// Generates code to use for a swizzle operation.
+static std::string GetSwizzle(u64 elem) {
+ ASSERT(elem <= 3);
+ std::string swizzle = ".";
+ swizzle += "xyzw"[elem];
+ return swizzle;
+}
+
/// Translate topology
static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -201,14 +209,53 @@ private:
}
};
+template <typename T>
+class ShaderScopedScope {
+public:
+ explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr)
+ : writer(writer), end_expr(std::move(end_expr)) {
+
+ if (begin_expr.empty()) {
+ writer.AddLine('{');
+ } else {
+ writer.AddExpression(begin_expr);
+ writer.AddLine(" {");
+ }
+ ++writer.scope;
+ }
+
+ ShaderScopedScope(const ShaderScopedScope&) = delete;
+
+ ~ShaderScopedScope() {
+ --writer.scope;
+ if (end_expr.empty()) {
+ writer.AddLine('}');
+ } else {
+ writer.AddExpression("} ");
+ writer.AddExpression(end_expr);
+ writer.AddLine(';');
+ }
+ }
+
+ ShaderScopedScope& operator=(const ShaderScopedScope&) = delete;
+
+private:
+ T& writer;
+ std::string end_expr;
+};
+
class ShaderWriter {
public:
- void AddLine(std::string_view text) {
+ void AddExpression(std::string_view text) {
DEBUG_ASSERT(scope >= 0);
if (!text.empty()) {
AppendIndentation();
}
shader_source += text;
+ }
+
+ void AddLine(std::string_view text) {
+ AddExpression(text);
AddNewLine();
}
@@ -228,6 +275,11 @@ public:
return std::move(shader_source);
}
+ ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {},
+ std::string end_expr = {}) {
+ return ShaderScopedScope(*this, begin_expr, end_expr);
+ }
+
int scope = 0;
private:
@@ -311,7 +363,7 @@ public:
// Default - do nothing
return value;
default:
- UNIMPLEMENTED_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
+ UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
}
}
@@ -525,6 +577,7 @@ public:
((header.vtg.clip_distances >> index) & 1) == 0,
"Shader is setting gl_ClipDistance{} without enabling it in the header", index);
+ clip_distances[index] = true;
fixed_pipeline_output_attributes_used.insert(attribute);
shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';');
break;
@@ -602,6 +655,11 @@ public:
return used_samplers;
}
+ /// Returns an array of the used clip distances.
+ const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const {
+ return clip_distances;
+ }
+
/// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
/// necessary.
std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
@@ -810,14 +868,12 @@ private:
}
if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
+
// This avoids optimizations of constant propagation and keeps the code as the original
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
shader.AddLine("precise float tmp = " + src + ';');
shader.AddLine(dest + " = tmp;");
- --shader.scope;
- shader.AddLine('}');
} else {
shader.AddLine(dest + " = " + src + ';');
}
@@ -956,14 +1012,6 @@ private:
}
}
- /// Generates code to use for a swizzle operation.
- static std::string GetSwizzle(u64 elem) {
- ASSERT(elem <= 3);
- std::string swizzle = ".";
- swizzle += "xyzw"[elem];
- return swizzle;
- }
-
ShaderWriter& shader;
ShaderWriter& declarations;
std::vector<GLSLRegister> regs;
@@ -975,6 +1023,7 @@ private:
const std::string& suffix;
const Tegra::Shader::Header& header;
std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
+ std::array<bool, Maxwell::NumClipDistances> clip_distances{};
u64 local_memory_size;
};
@@ -997,7 +1046,8 @@ public:
/// Returns entries in the shader that are useful for external functions
ShaderEntries GetEntries() const {
- return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), shader_length};
+ return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(),
+ shader_length};
}
private:
@@ -1293,15 +1343,7 @@ private:
regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
}
- void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
- const std::string& texture) {
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
- shader.AddLine('{');
- ++shader.scope;
- shader.AddLine(coord);
- shader.AddLine("vec4 texture_tmp = " + texture + ';');
-
+ void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) {
// TEXS has two destination registers and a swizzle. The first two elements in the swizzle
// go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
@@ -1313,19 +1355,49 @@ private:
if (written_components < 2) {
// Write the first two swizzle components to gpr0 and gpr0+1
- regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false,
+ regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false,
written_components % 2);
} else {
ASSERT(instr.texs.HasTwoDestinations());
// Write the rest of the swizzle components to gpr28 and gpr28+1
- regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false,
+ regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false,
written_components % 2);
}
++written_components;
}
- --shader.scope;
- shader.AddLine('}');
+ }
+
+ void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) {
+ // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+ // float instruction).
+
+ std::array<std::string, 4> components;
+ u32 written_components = 0;
+
+ for (u32 component = 0; component < 4; ++component) {
+ if (!instr.texs.IsComponentEnabled(component))
+ continue;
+ components[written_components++] = texture + GetSwizzle(component);
+ }
+ if (written_components == 0)
+ return;
+
+ const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) {
+ return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')';
+ };
+
+ regs.SetRegisterToHalfFloat(
+ instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1),
+ Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+
+ if (written_components > 2) {
+ ASSERT(instr.texs.HasTwoDestinations());
+ regs.SetRegisterToHalfFloat(
+ instr.gpr28, 0,
+ BuildComponent(components[2], components[3], written_components > 3),
+ Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+ }
}
static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
@@ -1348,12 +1420,10 @@ private:
* top.
*/
void EmitPushToFlowStack(u32 target) {
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
+
shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;");
shader.AddLine("flow_stack_top++;");
- --shader.scope;
- shader.AddLine('}');
}
/*
@@ -1361,13 +1431,11 @@ private:
* popped address and decrementing the stack top.
*/
void EmitPopFromFlowStack() {
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
+
shader.AddLine("flow_stack_top--;");
shader.AddLine("jmp_to = flow_stack[flow_stack_top];");
shader.AddLine("break;");
- --shader.scope;
- shader.AddLine('}');
}
/// Writes the output values from a fragment shader to the corresponding GLSL output variables.
@@ -1479,6 +1547,161 @@ private:
}
}
+ std::pair<size_t, std::string> ValidateAndGetCoordinateElement(
+ const Tegra::Shader::TextureType texture_type, const bool depth_compare,
+ const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) {
+ const size_t coord_count = TextureCoordinates(texture_type);
+
+ size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
+ const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
+ if (total_coord_count > max_coords || total_reg_count > max_inputs) {
+ UNIMPLEMENTED_MSG("Unsupported Texture operation");
+ total_coord_count = std::min(total_coord_count, max_coords);
+ }
+ // 1D.DC opengl is using a vec3 but 2nd component is ignored later.
+ total_coord_count +=
+ (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D)
+ ? 1
+ : 0;
+
+ constexpr std::array<const char*, 5> coord_container{
+ {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(",
+ "vec4 coord = vec4("}};
+
+ return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]);
+ }
+
+ std::string GetTextureCode(const Tegra::Shader::Instruction& instr,
+ const Tegra::Shader::TextureType texture_type,
+ const Tegra::Shader::TextureProcessMode process_mode,
+ const bool depth_compare, const bool is_array,
+ const size_t bias_offset) {
+
+ if ((texture_type == Tegra::Shader::TextureType::Texture3D &&
+ (is_array || depth_compare)) ||
+ (texture_type == Tegra::Shader::TextureType::TextureCube && is_array &&
+ depth_compare)) {
+ UNIMPLEMENTED_MSG("This method is not supported.");
+ }
+
+ const std::string sampler =
+ GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+ const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ ||
+ process_mode == Tegra::Shader::TextureProcessMode::LL ||
+ process_mode == Tegra::Shader::TextureProcessMode::LLA;
+
+ const bool gl_lod_supported = !(
+ (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
+ (texture_type == Tegra::Shader::TextureType::TextureCube && !is_array &&
+ depth_compare));
+
+ const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture(";
+ std::string texture = read_method + sampler + ", coord";
+
+ if (process_mode != Tegra::Shader::TextureProcessMode::None) {
+ if (process_mode == Tegra::Shader::TextureProcessMode::LZ) {
+ if (gl_lod_supported) {
+ texture += ", 0";
+ } else {
+ // Lod 0 is emulated by a big negative bias
+ // in scenarios that are not supported by glsl
+ texture += ", -1000";
+ }
+ } else {
+ // If present, lod or bias are always stored in the register indexed by the
+ // gpr20
+ // field with an offset depending on the usage of the other registers
+ texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset);
+ }
+ }
+ texture += ")";
+ return texture;
+ }
+
+ std::pair<std::string, std::string> GetTEXCode(
+ const Instruction& instr, const Tegra::Shader::TextureType texture_type,
+ const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
+ const bool is_array) {
+ const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+ process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+ const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
+ texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
+ // If enabled arrays index is always stored in the gpr8 field
+ const u64 array_register = instr.gpr8.Value();
+ // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+ const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+ std::string coord = coord_dcl;
+ for (size_t i = 0; i < coord_count;) {
+ coord += regs.GetRegisterAsFloat(coord_register + i);
+ ++i;
+ if (i != coord_count) {
+ coord += ',';
+ }
+ }
+ // 1D.DC in opengl the 2nd component is ignored.
+ if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) {
+ coord += ",0.0";
+ }
+ if (depth_compare) {
+ // Depth is always stored in the register signaled by gpr20
+ // or in the next register if lod or bias are used
+ const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+ coord += ',' + regs.GetRegisterAsFloat(depth_register);
+ }
+ if (is_array) {
+ coord += ',' + regs.GetRegisterAsInteger(array_register);
+ }
+ coord += ");";
+ return std::make_pair(
+ coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0));
+ }
+
+ std::pair<std::string, std::string> GetTEXSCode(
+ const Instruction& instr, const Tegra::Shader::TextureType texture_type,
+ const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
+ const bool is_array) {
+ const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+ process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+ const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
+ texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
+ // If enabled arrays index is always stored in the gpr8 field
+ const u64 array_register = instr.gpr8.Value();
+ // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
+ const u64 coord_register = array_register + (is_array ? 1 : 0);
+ const u64 last_coord_register =
+ (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
+ ? static_cast<u64>(instr.gpr20.Value())
+ : coord_register + 1;
+
+ std::string coord = coord_dcl;
+ for (size_t i = 0; i < coord_count; ++i) {
+ const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+ coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i);
+ if (!last) {
+ coord += ',';
+ }
+ }
+
+ if (depth_compare) {
+ // Depth is always stored in the register signaled by gpr20
+ // or in the next register if lod or bias are used
+ const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+ coord += ',' + regs.GetRegisterAsFloat(depth_register);
+ }
+ if (is_array) {
+ coord += ',' + regs.GetRegisterAsInteger(array_register);
+ }
+ coord += ");";
+
+ return std::make_pair(coord,
+ GetTextureCode(instr, texture_type, process_mode, depth_compare,
+ is_array, (coord_count > 2 ? 1 : 0)));
+ }
+
/**
* Compiles a single instruction from Tegra to GLSL.
* @param offset the offset of the Tegra shader instruction.
@@ -2279,8 +2502,7 @@ private:
UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
-
- std::string op_a{};
+ std::string op_a;
if (instr.is_b_gpr) {
op_a =
@@ -2436,10 +2658,7 @@ private:
case OpCode::Id::LD_C: {
UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
- // Add an extra scope and declare the index register inside to prevent
- // overwriting it in case it is used as an output of the LD instruction.
- shader.AddLine("{");
- ++shader.scope;
+ const auto scope = shader.Scope();
shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
" / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
@@ -2465,19 +2684,13 @@ private:
UNIMPLEMENTED_MSG("Unhandled type: {}",
static_cast<unsigned>(instr.ld_c.type.Value()));
}
-
- --shader.scope;
- shader.AddLine("}");
break;
}
case OpCode::Id::LD_L: {
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
static_cast<unsigned>(instr.ld_l.unknown.Value()));
- // Add an extra scope and declare the index register inside to prevent
- // overwriting it in case it is used as an output of the LD instruction.
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
std::to_string(instr.smem_imm.Value()) + ')';
@@ -2494,9 +2707,6 @@ private:
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
static_cast<unsigned>(instr.ldst_sl.type.Value()));
}
-
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::ST_A: {
@@ -2531,10 +2741,7 @@ private:
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
static_cast<unsigned>(instr.st_l.unknown.Value()));
- // Add an extra scope and declare the index register inside to prevent
- // overwriting it in case it is used as an output of the LD instruction.
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
std::to_string(instr.smem_imm.Value()) + ')';
@@ -2549,179 +2756,28 @@ private:
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
static_cast<unsigned>(instr.ldst_sl.type.Value()));
}
-
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TEX: {
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
- std::string coord;
const bool is_array = instr.tex.array != 0;
-
+ const bool depth_compare =
+ instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
+ const auto process_mode = instr.tex.GetTextureProcessMode();
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- const bool depth_compare =
- instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
- u32 num_coordinates = TextureCoordinates(texture_type);
- u32 start_index = 0;
- std::string array_elem;
- if (is_array) {
- array_elem = regs.GetRegisterAsInteger(instr.gpr8);
- start_index = 1;
- }
- const auto process_mode = instr.tex.GetTextureProcessMode();
- u32 start_index_b = 0;
- std::string lod_value;
- if (process_mode != Tegra::Shader::TextureProcessMode::LZ &&
- process_mode != Tegra::Shader::TextureProcessMode::None) {
- start_index_b = 1;
- lod_value = regs.GetRegisterAsFloat(instr.gpr20);
- }
-
- std::string depth_value;
- if (depth_compare) {
- depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b);
- }
+ const auto [coord, texture] =
+ GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array);
- bool depth_compare_extra = false;
-
- switch (num_coordinates) {
- case 1: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
- if (is_array) {
- if (depth_compare) {
- coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " +
- array_elem + ");";
- } else {
- coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");";
- }
- } else {
- if (depth_compare) {
- coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");";
- } else {
- coord = "float coords = " + x + ';';
- }
- }
- break;
- }
- case 2: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
- if (is_array) {
- if (depth_compare) {
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value +
- ", " + array_elem + ");";
- } else {
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");";
- }
- } else {
- if (depth_compare) {
- coord =
- "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");";
- } else {
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
- }
- }
- break;
- }
- case 3: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
- const std::string z =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
- if (is_array) {
- depth_compare_extra = depth_compare;
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
- array_elem + ");";
- } else {
- if (depth_compare) {
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
- depth_value + ");";
- } else {
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
- }
- }
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
- static_cast<u32>(num_coordinates));
-
- // Fallback to interpreting as a 2D texture for now
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
- texture_type = Tegra::Shader::TextureType::Texture2D;
- }
-
- const std::string sampler =
- GetSampler(instr.sampler, texture_type, is_array, depth_compare);
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
-
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
shader.AddLine(coord);
- std::string texture;
- switch (instr.tex.GetTextureProcessMode()) {
- case Tegra::Shader::TextureProcessMode::None: {
- if (!depth_compare_extra) {
- texture = "texture(" + sampler + ", coords)";
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- break;
- }
- case Tegra::Shader::TextureProcessMode::LZ: {
- if (!depth_compare_extra) {
- texture = "textureLod(" + sampler + ", coords, 0.0)";
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- break;
- }
- case Tegra::Shader::TextureProcessMode::LB:
- case Tegra::Shader::TextureProcessMode::LBA: {
- // TODO: Figure if A suffix changes the equation at all.
- if (!depth_compare_extra) {
- texture = "texture(" + sampler + ", coords, " + lod_value + ')';
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- LOG_WARNING(HW_GPU,
- "OpenGL Limitation: can't set bias value along depth compare");
- }
- break;
- }
- case Tegra::Shader::TextureProcessMode::LL:
- case Tegra::Shader::TextureProcessMode::LLA: {
- // TODO: Figure if A suffix changes the equation at all.
- if (!depth_compare_extra) {
- texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- LOG_WARNING(HW_GPU,
- "OpenGL Limitation: can't set lod value along depth compare");
- }
- break;
- }
- default: {
- if (!depth_compare_extra) {
- texture = "texture(" + sampler + ", coords)";
- } else {
- texture = "texture(" + sampler + ", coords, " + depth_value + ')';
- }
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.tex.GetTextureProcessMode()));
- }
- }
- if (!depth_compare) {
+ if (depth_compare) {
+ regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
+ } else {
shader.AddLine("vec4 texture_tmp = " + texture + ';');
std::size_t dest_elem{};
for (std::size_t elem = 0; elem < 4; ++elem) {
@@ -2733,138 +2789,36 @@ private:
dest_elem);
++dest_elem;
}
- } else {
- regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
}
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TEXS: {
Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
- bool is_array{instr.texs.IsArrayTexture()};
+ const bool is_array{instr.texs.IsArrayTexture()};
+ const bool depth_compare =
+ instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
+ const auto process_mode = instr.texs.GetTextureProcessMode();
UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
- const bool depth_compare =
- instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
- u32 num_coordinates = TextureCoordinates(texture_type);
- const auto process_mode = instr.texs.GetTextureProcessMode();
- std::string lod_value;
- std::string coord;
- u32 lod_offset = 0;
- if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
- if (num_coordinates > 2) {
- lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
- lod_offset = 2;
- } else {
- lod_value = regs.GetRegisterAsFloat(instr.gpr20);
- lod_offset = 1;
- }
- }
+ const auto scope = shader.Scope();
- switch (num_coordinates) {
- case 1: {
- coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';';
- break;
- }
- case 2: {
- if (is_array) {
- if (depth_compare) {
- const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
- coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
- ");";
- } else {
- const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
- }
- } else {
- if (lod_offset != 0) {
- if (depth_compare) {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string z =
- regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
- } else {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
- }
- } else {
- if (depth_compare) {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y =
- regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
- } else {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
- }
- }
- }
- break;
- }
- case 3: {
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
- static_cast<u32>(num_coordinates));
+ auto [coord, texture] =
+ GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array);
- // Fallback to interpreting as a 2D texture for now
- const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
- const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
- texture_type = Tegra::Shader::TextureType::Texture2D;
- is_array = false;
- }
- const std::string sampler =
- GetSampler(instr.sampler, texture_type, is_array, depth_compare);
- std::string texture;
- switch (process_mode) {
- case Tegra::Shader::TextureProcessMode::None: {
- texture = "texture(" + sampler + ", coords)";
- break;
- }
- case Tegra::Shader::TextureProcessMode::LZ: {
- if (depth_compare && is_array) {
- texture = "texture(" + sampler + ", coords)";
- } else {
- texture = "textureLod(" + sampler + ", coords, 0.0)";
- }
- break;
- }
- case Tegra::Shader::TextureProcessMode::LL: {
- texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
- break;
- }
- default: {
- texture = "texture(" + sampler + ", coords)";
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.texs.GetTextureProcessMode()));
- }
+ shader.AddLine(coord);
+
+ if (depth_compare) {
+ texture = "vec4(" + texture + ')';
}
- if (!depth_compare) {
- WriteTexsInstruction(instr, coord, texture);
+ shader.AddLine("vec4 texture_tmp = " + texture + ';');
+
+ if (instr.texs.fp32_flag) {
+ WriteTexsInstructionFloat(instr, "texture_tmp");
} else {
- WriteTexsInstruction(instr, coord, "vec4(" + texture + ')');
+ WriteTexsInstructionHalfFloat(instr, "texture_tmp");
}
-
break;
}
case OpCode::Id::TLDS: {
@@ -2883,15 +2837,12 @@ private:
u32 extra_op_offset = 0;
- // Scope to avoid variable name overlaps.
- shader.AddLine('{');
- ++shader.scope;
- std::string coords;
+ ShaderScopedScope scope = shader.Scope();
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D: {
const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
- coords = "float coords = " + x + ';';
+ shader.AddLine("float coords = " + x + ';');
break;
}
case Tegra::Shader::TextureType::Texture2D: {
@@ -2900,7 +2851,7 @@ private:
const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
// shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
- coords = "ivec2 coords = ivec2(" + x + ", " + y + ");";
+ shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
extra_op_offset = 1;
break;
}
@@ -2909,35 +2860,29 @@ private:
}
const std::string sampler =
GetSampler(instr.sampler, texture_type, is_array, false);
- std::string texture = "texelFetch(" + sampler + ", coords, 0)";
- switch (instr.tlds.GetTextureProcessMode()) {
- case Tegra::Shader::TextureProcessMode::LZ: {
- texture = "texelFetch(" + sampler + ", coords, 0)";
- break;
- }
- case Tegra::Shader::TextureProcessMode::LL: {
- shader.AddLine(
- "float lod = " +
- regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';');
- texture = "texelFetch(" + sampler + ", coords, lod)";
- break;
- }
- default: {
- texture = "texelFetch(" + sampler + ", coords, 0)";
- UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
- static_cast<u32>(instr.tlds.GetTextureProcessMode()));
- }
- }
- WriteTexsInstruction(instr, coords, texture);
- --shader.scope;
- shader.AddLine('}');
+ const std::string texture = [&]() {
+ switch (instr.tlds.GetTextureProcessMode()) {
+ case Tegra::Shader::TextureProcessMode::LZ:
+ return "texelFetch(" + sampler + ", coords, 0)";
+ case Tegra::Shader::TextureProcessMode::LL:
+ shader.AddLine(
+ "float lod = " +
+ regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';');
+ return "texelFetch(" + sampler + ", coords, lod)";
+ default:
+ UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
+ static_cast<u32>(instr.tlds.GetTextureProcessMode()));
+ return "texelFetch(" + sampler + ", coords, 0)";
+ }
+ }();
+
+ WriteTexsInstructionFloat(instr, texture);
break;
}
case OpCode::Id::TLD4: {
ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
ASSERT(instr.tld4.array == 0);
- std::string coord;
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
@@ -2954,10 +2899,7 @@ private:
if (depth_compare)
num_coordinates += 1;
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
- shader.AddLine('{');
- ++shader.scope;
+ const auto scope = shader.Scope();
switch (num_coordinates) {
case 2: {
@@ -2988,23 +2930,19 @@ private:
const std::string texture = "textureGather(" + sampler + ", coords, " +
std::to_string(instr.tld4.component) + ')';
- if (!depth_compare) {
- shader.AddLine("vec4 texture_tmp = " + texture + ';');
+ if (depth_compare) {
+ regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
+ } else {
std::size_t dest_elem{};
for (std::size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
}
- regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
- dest_elem);
+ regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem);
++dest_elem;
}
- } else {
- regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
}
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TLD4S: {
@@ -3015,10 +2953,7 @@ private:
instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
"AOFFI is not implemented");
- // Scope to avoid variable name overlaps.
- shader.AddLine('{');
- ++shader.scope;
- std::string coords;
+ const auto scope = shader.Scope();
const bool depth_compare =
instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
@@ -3027,33 +2962,30 @@ private:
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
const std::string sampler = GetSampler(
instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
- if (!depth_compare) {
- coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
- } else {
+ if (depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");";
+ shader.AddLine("vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");");
+ } else {
+ shader.AddLine("vec2 coords = vec2(" + op_a + ", " + op_b + ");");
}
- const std::string texture = "textureGather(" + sampler + ", coords, " +
- std::to_string(instr.tld4s.component) + ')';
- if (!depth_compare) {
- WriteTexsInstruction(instr, coords, texture);
- } else {
- WriteTexsInstruction(instr, coords, "vec4(" + texture + ')');
+ std::string texture = "textureGather(" + sampler + ", coords, " +
+ std::to_string(instr.tld4s.component) + ')';
+ if (depth_compare) {
+ texture = "vec4(" + texture + ')';
}
- --shader.scope;
- shader.AddLine('}');
+ WriteTexsInstructionFloat(instr, texture);
break;
}
case OpCode::Id::TXQ: {
UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
"NODEP is not implemented");
- ++shader.scope;
- shader.AddLine('{');
- // TODO: the new commits on the texture refactor, change the way samplers work.
+ const auto scope = shader.Scope();
+
+ // TODO: The new commits on the texture refactor, change the way samplers work.
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const std::string sampler =
@@ -3064,7 +2996,8 @@ private:
regs.GetRegisterAsInteger(instr.gpr8) + ')';
const std::string mip_level = "textureQueryLevels(" + sampler + ')';
shader.AddLine("ivec2 sizes = " + texture + ';');
- regs.SetRegisterToInteger(instr.gpr0, true, 0, "sizes.x", 1, 1);
+
+ regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1);
@@ -3075,8 +3008,6 @@ private:
static_cast<u32>(instr.txq.query_type.Value()));
}
}
- --shader.scope;
- shader.AddLine('}');
break;
}
case OpCode::Id::TMML: {
@@ -3091,17 +3022,18 @@ private:
const std::string sampler =
GetSampler(instr.sampler, texture_type, is_array, false);
- // TODO: add coordinates for different samplers once other texture types are
+ const auto scope = shader.Scope();
+
+ // TODO: Add coordinates for different samplers once other texture types are
// implemented.
- std::string coord;
switch (texture_type) {
case Tegra::Shader::TextureType::Texture1D: {
- coord = "float coords = " + x + ';';
+ shader.AddLine("float coords = " + x + ';');
break;
}
case Tegra::Shader::TextureType::Texture2D: {
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
break;
}
default:
@@ -3109,22 +3041,15 @@ private:
// Fallback to interpreting as a 2D texture for now
const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
- coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+ shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
texture_type = Tegra::Shader::TextureType::Texture2D;
}
- // Add an extra scope and declare the texture coords inside to prevent
- // overwriting them in case they are used as outputs of the texs instruction.
- shader.AddLine('{');
- ++shader.scope;
- shader.AddLine(coord);
+
const std::string texture = "textureQueryLod(" + sampler + ", coords)";
- const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
- shader.AddLine(tmp);
+ shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);");
regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
- --shader.scope;
- shader.AddLine('}');
break;
}
default: {
@@ -3963,4 +3888,4 @@ std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u
return {};
}
-} // namespace OpenGL::GLShader::Decompiler
+} // namespace OpenGL::GLShader::Decompiler \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index b425d98ae..4fa6d7612 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -163,6 +163,7 @@ private:
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffer_entries;
std::vector<SamplerEntry> texture_samplers;
+ std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances;
std::size_t shader_length;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index b757f5f44..4970aafed 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -60,6 +60,17 @@ public:
}
void ApplyTo(OpenGLState& state) {
+ UpdatePipeline();
+ state.draw.shader_program = 0;
+ state.draw.program_pipeline = pipeline.handle;
+ state.geometry_shaders.enabled = (gs != 0);
+ }
+
+private:
+ void UpdatePipeline() {
+ // Avoid updating the pipeline when values have no changed
+ if (old_vs == vs && old_fs == fs && old_gs == gs)
+ return;
// Workaround for AMD bug
glUseProgramStages(pipeline.handle,
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
@@ -68,14 +79,16 @@ public:
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs);
glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs);
- state.draw.shader_program = 0;
- state.draw.program_pipeline = pipeline.handle;
- state.geometry_shaders.enabled = (gs != 0);
+
+ // Update the old values
+ old_vs = vs;
+ old_fs = fs;
+ old_gs = gs;
}
-private:
OGLPipeline pipeline;
GLuint vs{}, fs{}, gs{};
+ GLuint old_vs{}, old_fs{}, old_gs{};
};
} // namespace OpenGL::GLShader