summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp70
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp116
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h31
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp140
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h92
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp53
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp13
9 files changed, 358 insertions, 211 deletions
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7992b82c4..c7f32feaa 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,8 +4,13 @@
#include <glad/glad.h>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
@@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{
LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
}
+void CachedGlobalRegion::Reload(u32 size_) {
+ constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
+
+ size = size_;
+ if (size > max_size) {
+ size = max_size;
+ LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
+ max_size);
+ }
+
+ // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+ const auto search{reserve.find(addr)};
+ if (search == reserve.end()) {
+ return {};
+ }
+ return search->second;
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
+ GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+ if (!region) {
+ // No reserved surface available, create a new one and reserve it
+ region = std::make_shared<CachedGlobalRegion>(addr, size);
+ ReserveGlobalRegion(region);
+ }
+ region->Reload(size);
+ return region;
+}
+
+void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
+ reserve[region->GetAddr()] = region;
+}
+
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {}
+GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
+ const GLShader::GlobalMemoryEntry& global_region,
+ Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
+
+ auto& gpu{Core::System::GetInstance().GPU()};
+ const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
+ const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
+ cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
+ ASSERT(cbuf_addr);
+
+ const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
+ const auto size = Memory::Read32(*cbuf_addr + 8);
+ const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
+ ASSERT(actual_addr);
+
+ // Look up global region in the cache based on address
+ GlobalRegion region = TryGet(*actual_addr);
+
+ if (!region) {
+ // No global region found - create a new one
+ region = GetUncachedGlobalRegion(*actual_addr, size);
+ Register(region);
+ }
+
+ return region;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 406a735bc..37830bb7c 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -5,9 +5,13 @@
#pragma once
#include <memory>
+#include <unordered_map>
+
#include <glad/glad.h>
+#include "common/assert.h"
#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -40,6 +44,9 @@ public:
return buffer.handle;
}
+ /// Reloads the global region from guest memory
+ void Reload(u32 size_);
+
// TODO(Rodrigo): When global memory is written (STG), implement flushing
void Flush() override {
UNIMPLEMENTED();
@@ -55,6 +62,17 @@ private:
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
public:
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
+
+ /// Gets the current specified shader stage program
+ GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
+ Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
+
+private:
+ GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
+ GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
+ void ReserveGlobalRegion(const GlobalRegion& region);
+
+ std::unordered_map<VAddr, GlobalRegion> reserve;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 71829fee0..ee313cb2f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -297,10 +297,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
- // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
- // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
- u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
- u32 current_texture_bindpoint = 0;
+ BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -324,43 +321,35 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
- // Bind the buffer
- glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(),
- offset, static_cast<GLsizeiptr>(sizeof(ubo)));
+ // Bind the emulation info buffer
+ glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
+ static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)};
+ const auto [program_handle, next_bindings] =
+ shader->GetProgramHandle(primitive_mode, base_bindings);
switch (program) {
case Maxwell::ShaderProgram::VertexA:
- case Maxwell::ShaderProgram::VertexB: {
- shader_program_manager->UseProgrammableVertexShader(
- shader->GetProgramHandle(primitive_mode));
+ case Maxwell::ShaderProgram::VertexB:
+ shader_program_manager->UseProgrammableVertexShader(program_handle);
break;
- }
- case Maxwell::ShaderProgram::Geometry: {
- shader_program_manager->UseProgrammableGeometryShader(
- shader->GetProgramHandle(primitive_mode));
+ case Maxwell::ShaderProgram::Geometry:
+ shader_program_manager->UseProgrammableGeometryShader(program_handle);
break;
- }
- case Maxwell::ShaderProgram::Fragment: {
- shader_program_manager->UseProgrammableFragmentShader(
- shader->GetProgramHandle(primitive_mode));
+ case Maxwell::ShaderProgram::Fragment:
+ shader_program_manager->UseProgrammableFragmentShader(program_handle);
break;
- }
default:
LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
shader_config.enable.Value(), shader_config.offset);
UNREACHABLE();
}
- // Configure the const buffers for this shader stage.
- current_constbuffer_bindpoint =
- SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
- current_constbuffer_bindpoint);
-
- // Configure the textures for this shader stage.
- current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
- primitive_mode, current_texture_bindpoint);
+ const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
+ SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
+ SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
+ SetupTextures(stage_enum, shader, program_handle, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -375,6 +364,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
+
+ base_bindings = next_bindings;
}
SyncClipEnabled(clip_distances);
@@ -924,8 +915,9 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
-u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
- GLenum primitive_mode, u32 current_bindpoint) {
+void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader, GLuint program_handle,
+ BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -973,75 +965,73 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
- GLintptr const_buffer_offset = buffer_cache.UploadMemory(
+ const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
- // Now configure the bindpoint of the buffer inside the shader
- glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
- shader->GetProgramResourceIndex(used_buffer),
- current_bindpoint + bindpoint);
-
// Prepare values for multibind
bind_buffers[bindpoint] = buffer_cache.GetHandle();
bind_offsets[bindpoint] = const_buffer_offset;
bind_sizes[bindpoint] = size;
}
- glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()),
+ // The first binding is reserved for emulation values
+ const GLuint ubo_base_binding = base_bindings.cbuf + 1;
+ glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
+}
+
+void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader, GLenum primitive_mode,
+ BaseBindings base_bindings) {
+ // TODO(Rodrigo): Use ARB_multi_bind here
+ const auto& entries = shader->GetShaderEntries().global_memory_entries;
- return current_bindpoint + static_cast<u32>(entries.size());
+ for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ const u32 current_bindpoint = base_bindings.gmem + bindpoint;
+ const auto& region = global_cache.GetGlobalRegion(entry, stage);
+
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
+ }
}
-u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
- GLenum primitive_mode, u32 current_unit) {
+void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
+ GLuint program_handle, BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
- ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
+ ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
"Exceeded the number of active textures.");
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- const u32 current_bindpoint = current_unit + bindpoint;
-
- // Bind the uniform to the sampler.
-
- glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
- shader->GetUniformLocation(entry), current_bindpoint);
+ const u32 current_bindpoint = base_bindings.sampler + bindpoint;
+ auto& unit = state.texture_units[current_bindpoint];
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
-
if (!texture.enabled) {
- state.texture_units[current_bindpoint].texture = 0;
+ unit.texture = 0;
continue;
}
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+
Surface surface = res_cache.GetTextureSurface(texture, entry);
if (surface != nullptr) {
- const GLuint handle =
+ unit.texture =
entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
- const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
- state.texture_units[current_bindpoint].texture = handle;
- state.texture_units[current_bindpoint].target = target;
- state.texture_units[current_bindpoint].swizzle.r =
- MaxwellToGL::SwizzleSource(texture.tic.x_source);
- state.texture_units[current_bindpoint].swizzle.g =
- MaxwellToGL::SwizzleSource(texture.tic.y_source);
- state.texture_units[current_bindpoint].swizzle.b =
- MaxwellToGL::SwizzleSource(texture.tic.z_source);
- state.texture_units[current_bindpoint].swizzle.a =
- MaxwellToGL::SwizzleSource(texture.tic.w_source);
+ unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
+ unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source);
+ unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
+ unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
+ unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
} else {
// Can occur when texture addr is null or its memory is unmapped/invalid
- state.texture_units[current_bindpoint].texture = 0;
+ unit.texture = 0;
}
}
-
- return current_unit + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 21c51f874..a103692f9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -127,25 +127,18 @@ private:
bool using_depth_fb = true, bool preserve_contents = true,
std::optional<std::size_t> single_color_target = {});
- /**
- * Configures the current constbuffers to use for the draw command.
- * @param stage The shader stage to configure buffers for.
- * @param shader The shader object that contains the specified stage.
- * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
- * @returns The next available bindpoint for use in the next shader stage.
- */
- u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
- GLenum primitive_mode, u32 current_bindpoint);
-
- /**
- * Configures the current textures to use for the draw command.
- * @param stage The shader stage to configure textures for.
- * @param shader The shader object that contains the specified stage.
- * @param current_unit The offset at which to start counting unused texture units.
- * @returns The next available bindpoint for use in the next shader stage.
- */
- u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
- GLenum primitive_mode, u32 current_unit);
+ /// Configures the current constbuffers to use for the draw command.
+ void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
+ GLuint program_handle, BaseBindings base_bindings);
+
+ /// Configures the current global memory entries to use for the draw command.
+ void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader, GLenum primitive_mode,
+ BaseBindings base_bindings);
+
+ /// Configures the current textures to use for the draw command.
+ void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
+ GLuint program_handle, BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index b3aca39af..90eda7814 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,36 +34,25 @@ static ProgramCode GetShaderCode(VAddr addr) {
return program_code;
}
-/// Helper function to set shader uniform block bindings for a single shader stage
-static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
- Maxwell::ShaderStage binding, std::size_t expected_size) {
- const GLuint ub_index = glGetUniformBlockIndex(shader, name);
- if (ub_index == GL_INVALID_INDEX) {
- return;
+/// Gets the shader type from a Maxwell program type
+constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
+ switch (program_type) {
+ case Maxwell::ShaderProgram::VertexA:
+ case Maxwell::ShaderProgram::VertexB:
+ return GL_VERTEX_SHADER;
+ case Maxwell::ShaderProgram::Geometry:
+ return GL_GEOMETRY_SHADER;
+ case Maxwell::ShaderProgram::Fragment:
+ return GL_FRAGMENT_SHADER;
+ default:
+ return GL_NONE;
}
-
- GLint ub_size = 0;
- glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
- ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
- "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
- glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
-}
-
-/// Sets shader uniform block bindings for an entire shader program
-static void SetShaderUniformBlockBindings(GLuint shader) {
- SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
- sizeof(GLShader::MaxwellUniformData));
- SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
- sizeof(GLShader::MaxwellUniformData));
- SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
- sizeof(GLShader::MaxwellUniformData));
}
CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
: addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
GLShader::ProgramResult program_result;
- GLenum gl_type{};
switch (program_type) {
case Maxwell::ShaderProgram::VertexA:
@@ -74,17 +63,14 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
case Maxwell::ShaderProgram::VertexB:
CalculateProperties();
program_result = GLShader::GenerateVertexShader(setup);
- gl_type = GL_VERTEX_SHADER;
break;
case Maxwell::ShaderProgram::Geometry:
CalculateProperties();
program_result = GLShader::GenerateGeometryShader(setup);
- gl_type = GL_GEOMETRY_SHADER;
break;
case Maxwell::ShaderProgram::Fragment:
CalculateProperties();
program_result = GLShader::GenerateFragmentShader(setup);
- gl_type = GL_FRAGMENT_SHADER;
break;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
@@ -92,59 +78,105 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
return;
}
+ code = program_result.first;
entries = program_result.second;
shader_length = entries.shader_length;
+}
- if (program_type != Maxwell::ShaderProgram::Geometry) {
- OGLShader shader;
- shader.Create(program_result.first.c_str(), gl_type);
- program.Create(true, shader.handle);
- SetShaderUniformBlockBindings(program.handle);
- LabelGLObject(GL_PROGRAM, program.handle, addr);
+std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
+ BaseBindings base_bindings) {
+ GLuint handle{};
+ if (program_type == Maxwell::ShaderProgram::Geometry) {
+ handle = GetGeometryShader(primitive_mode, base_bindings);
} else {
- // Store shader's code to lazily build it on draw
- geometry_programs.code = program_result.first;
+ const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
+ auto& program = entry->second;
+ if (is_cache_miss) {
+ std::string source = AllocateBindings(base_bindings);
+ source += code;
+
+ OGLShader shader;
+ shader.Create(source.c_str(), GetShaderType(program_type));
+ program.Create(true, shader.handle);
+ LabelGLObject(GL_PROGRAM, program.handle, addr);
+ }
+
+ handle = program.handle;
}
+
+ // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for
+ // emulation values
+ base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
+ base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
+ base_bindings.sampler += static_cast<u32>(entries.samplers.size());
+
+ return {handle, base_bindings};
}
-GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
- const auto search{resource_cache.find(buffer.GetHash())};
- if (search == resource_cache.end()) {
- const GLuint index{
- glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
- resource_cache[buffer.GetHash()] = index;
- return index;
+std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
+ std::string code = "#version 430 core\n";
+ code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
+
+ for (const auto& cbuf : entries.const_buffers) {
+ code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
}
- return search->second;
-}
+ for (const auto& gmem : entries.global_memory_entries) {
+ code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
+ gmem.GetCbufOffset(), base_bindings.gmem++);
+ }
-GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
- const auto search{uniform_cache.find(sampler.GetHash())};
- if (search == uniform_cache.end()) {
- const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
- uniform_cache[sampler.GetHash()] = index;
- return index;
+ for (const auto& sampler : entries.samplers) {
+ code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
+ base_bindings.sampler++);
}
- return search->second;
+ return code;
+}
+
+GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
+ const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
+ auto& programs = entry->second;
+
+ switch (primitive_mode) {
+ case GL_POINTS:
+ return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
+ case GL_LINES:
+ case GL_LINE_STRIP:
+ return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines");
+ case GL_LINES_ADJACENCY:
+ case GL_LINE_STRIP_ADJACENCY:
+ return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4,
+ "ShaderLinesAdjacency");
+ case GL_TRIANGLES:
+ case GL_TRIANGLE_STRIP:
+ case GL_TRIANGLE_FAN:
+ return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3,
+ "ShaderTriangles");
+ case GL_TRIANGLES_ADJACENCY:
+ case GL_TRIANGLE_STRIP_ADJACENCY:
+ return LazyGeometryProgram(programs.triangles_adjacency, base_bindings,
+ "triangles_adjacency", 6, "ShaderTrianglesAdjacency");
+ default:
+ UNREACHABLE_MSG("Unknown primitive mode.");
+ return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
+ }
}
-GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
+GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
const std::string& glsl_topology, u32 max_vertices,
const std::string& debug_name) {
if (target_program.handle != 0) {
return target_program.handle;
}
- std::string source = "#version 430 core\n";
+ std::string source = AllocateBindings(base_bindings);
source += "layout (" + glsl_topology + ") in;\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
- source += geometry_programs.code;
+ source += code;
OGLShader shader;
shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
target_program.Create(true, shader.handle);
- SetShaderUniformBlockBindings(target_program.handle);
LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
return target_program.handle;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index e0887dd7b..904d15dd0 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -7,6 +7,9 @@
#include <array>
#include <map>
#include <memory>
+#include <tuple>
+
+#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_types.h"
@@ -23,6 +26,16 @@ class RasterizerOpenGL;
using Shader = std::shared_ptr<CachedShader>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+struct BaseBindings {
+ u32 cbuf{};
+ u32 gmem{};
+ u32 sampler{};
+
+ bool operator<(const BaseBindings& rhs) const {
+ return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
+ }
+};
+
class CachedShader final : public RasterizerCacheObject {
public:
CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
@@ -44,70 +57,45 @@ public:
}
/// Gets the GL program handle for the shader
- GLuint GetProgramHandle(GLenum primitive_mode) {
- if (program_type != Maxwell::ShaderProgram::Geometry) {
- return program.handle;
- }
- switch (primitive_mode) {
- case GL_POINTS:
- return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
- case GL_LINES:
- case GL_LINE_STRIP:
- return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
- case GL_LINES_ADJACENCY:
- case GL_LINE_STRIP_ADJACENCY:
- return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
- "ShaderLinesAdjacency");
- case GL_TRIANGLES:
- case GL_TRIANGLE_STRIP:
- case GL_TRIANGLE_FAN:
- return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
- "ShaderTriangles");
- case GL_TRIANGLES_ADJACENCY:
- case GL_TRIANGLE_STRIP_ADJACENCY:
- return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
- 6, "ShaderTrianglesAdjacency");
- default:
- UNREACHABLE_MSG("Unknown primitive mode.");
- return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
- }
- }
+ std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
+ BaseBindings base_bindings);
- /// Gets the GL program resource location for the specified resource, caching as needed
- GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
+private:
+ // Geometry programs. These are needed because GLSL needs an input topology but it's not
+ // declared by the hardware. Workaround this issue by generating a different shader per input
+ // topology class.
+ struct GeometryPrograms {
+ OGLProgram points;
+ OGLProgram lines;
+ OGLProgram lines_adjacency;
+ OGLProgram triangles;
+ OGLProgram triangles_adjacency;
+ };
- /// Gets the GL uniform location for the specified resource, caching as needed
- GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
+ std::string AllocateBindings(BaseBindings base_bindings);
+
+ GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
-private:
/// Generates a geometry shader or returns one that already exists.
- GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
- u32 max_vertices, const std::string& debug_name);
+ GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
+ const std::string& glsl_topology, u32 max_vertices,
+ const std::string& debug_name);
void CalculateProperties();
- VAddr addr;
- std::size_t shader_length;
- Maxwell::ShaderProgram program_type;
+ VAddr addr{};
+ std::size_t shader_length{};
+ Maxwell::ShaderProgram program_type{};
GLShader::ShaderSetup setup;
GLShader::ShaderEntries entries;
- // Non-geometry program.
- OGLProgram program;
+ std::string code;
- // Geometry programs. These are needed because GLSL needs an input topology but it's not
- // declared by the hardware. Workaround this issue by generating a different shader per input
- // topology class.
- struct {
- std::string code;
- OGLProgram points;
- OGLProgram lines;
- OGLProgram lines_adjacency;
- OGLProgram triangles;
- OGLProgram triangles_adjacency;
- } geometry_programs;
+ std::map<BaseBindings, OGLProgram> programs;
+ std::map<BaseBindings, GeometryPrograms> geometry_programs;
- std::map<u32, GLuint> resource_cache;
+ std::map<u32, GLuint> cbuf_resource_cache;
+ std::map<u32, GLuint> gmem_resource_cache;
std::map<u32, GLint> uniform_cache;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3411cf9e6..004245431 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -34,6 +34,8 @@ using Operation = const OperationNode&;
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
+constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
+ static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -143,6 +145,7 @@ public:
DeclareInputAttributes();
DeclareOutputAttributes();
DeclareConstantBuffers();
+ DeclareGlobalMemory();
DeclareSamplers();
code.AddLine("void execute_" + suffix + "() {");
@@ -190,12 +193,15 @@ public:
ShaderEntries GetShaderEntries() const {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
- ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first);
- entries.const_buffers.push_back(desc);
+ entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
+ cbuf.first);
}
for (const auto& sampler : ir.GetSamplers()) {
- SamplerEntry desc(sampler, stage, GetSampler(sampler));
- entries.samplers.push_back(desc);
+ entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
+ }
+ for (const auto& gmem : ir.GetGlobalMemoryBases()) {
+ entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
+ GetGlobalMemoryBlock(gmem));
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -368,13 +374,26 @@ private:
void DeclareConstantBuffers() {
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
- code.AddLine("layout (std140) uniform " + GetConstBufferBlock(index) + " {");
+ code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) +
+ ") uniform " + GetConstBufferBlock(index) + " {");
code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];");
code.AddLine("};");
code.AddNewLine();
}
}
+ void DeclareGlobalMemory() {
+ for (const auto& entry : ir.GetGlobalMemoryBases()) {
+ const std::string binding =
+ fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
+ code.AddLine("layout (std430, binding = " + binding + ") buffer " +
+ GetGlobalMemoryBlock(entry) + " {");
+ code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
+ code.AddLine("};");
+ code.AddNewLine();
+ }
+ }
+
void DeclareSamplers() {
const auto& samplers = ir.GetSamplers();
for (const auto& sampler : samplers) {
@@ -398,7 +417,8 @@ private:
if (sampler.IsShadow())
sampler_type += "Shadow";
- code.AddLine("uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
+ code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
+ ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
}
if (!samplers.empty())
code.AddNewLine();
@@ -538,6 +558,12 @@ private:
UNREACHABLE_MSG("Unmanaged offset node type");
}
+ } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+ const std::string real = Visit(gmem->GetRealAddress());
+ const std::string base = Visit(gmem->GetBaseAddress());
+ const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+
} else if (const auto lmem = std::get_if<LmemNode>(node)) {
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
@@ -1471,6 +1497,15 @@ private:
return GetDeclarationWithSuffix(index, "cbuf");
}
+ std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
+ return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
+ }
+
+ std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
+ return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
+ suffix);
+ }
+
std::string GetConstBufferBlock(u32 index) const {
return GetDeclarationWithSuffix(index, "cbuf_block");
}
@@ -1505,8 +1540,10 @@ private:
};
std::string GetCommonDeclarations() {
- return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) +
- "\n"
+ const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
+ const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
+ return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
+ "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 396a560d8..0856a1361 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -38,10 +38,6 @@ public:
return index;
}
- u32 GetHash() const {
- return (static_cast<u32>(stage) << 16) | index;
- }
-
private:
std::string name;
Maxwell::ShaderStage stage{};
@@ -62,18 +58,44 @@ public:
return stage;
}
- u32 GetHash() const {
- return (static_cast<u32>(stage) << 16) | static_cast<u32>(GetIndex());
+private:
+ std::string name;
+ Maxwell::ShaderStage stage{};
+};
+
+class GlobalMemoryEntry {
+public:
+ explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
+ std::string name)
+ : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
+
+ u32 GetCbufIndex() const {
+ return cbuf_index;
+ }
+
+ u32 GetCbufOffset() const {
+ return cbuf_offset;
+ }
+
+ const std::string& GetName() const {
+ return name;
+ }
+
+ Maxwell::ShaderStage GetStage() const {
+ return stage;
}
private:
- std::string name;
+ u32 cbuf_index{};
+ u32 cbuf_offset{};
Maxwell::ShaderStage stage{};
+ std::string name;
};
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
+ std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 446d1a93f..04e1db911 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -20,15 +20,14 @@ static constexpr u32 PROGRAM_OFFSET{10};
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#version 430 core\n";
- out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: VS" + id + "\n\n";
out += GetCommonDeclarations();
out += R"(
layout (location = 0) out vec4 position;
-layout(std140) uniform vs_config {
+layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
@@ -78,7 +77,6 @@ void main() {
}
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
- // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@@ -89,7 +87,7 @@ ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
layout (location = 0) in vec4 gs_position[];
layout (location = 0) out vec4 position;
-layout (std140) uniform gs_config {
+layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;
@@ -112,8 +110,7 @@ void main() {
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
- std::string out = "#version 430 core\n";
- out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+ std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += "// Shader Unique Id: FS" + id + "\n\n";
out += GetCommonDeclarations();
@@ -129,7 +126,7 @@ layout (location = 7) out vec4 FragColor7;
layout (location = 0) in vec4 position;
-layout (std140) uniform fs_config {
+layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
uvec4 alpha_test;