summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2019-01-30 04:02:01 +0100
committerGitHub <noreply@github.com>2019-01-30 04:02:01 +0100
commit52bb5245268e113bfacc4c4bb27a74ea3058adbc (patch)
tree4ba30f0ad404bd4120dd5b2e4f21193bf03952fd /src/video_core/renderer_opengl
parentvideo_core/GPU Implemented the GPU PFIFO puller semaphore operations. (#1908) (diff)
parentgl_rasterizer: Implement global memory management (diff)
downloadyuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar
yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.gz
yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.bz2
yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.lz
yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.xz
yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.zst
yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.zip
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp70
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h34
8 files changed, 212 insertions, 10 deletions
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7992b82c4..c7f32feaa 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,8 +4,13 @@
#include <glad/glad.h>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "core/core.h"
+#include "core/memory.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
@@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{
LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
}
+void CachedGlobalRegion::Reload(u32 size_) {
+ constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
+
+ size = size_;
+ if (size > max_size) {
+ size = max_size;
+ LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
+ max_size);
+ }
+
+ // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
+ const auto search{reserve.find(addr)};
+ if (search == reserve.end()) {
+ return {};
+ }
+ return search->second;
+}
+
+GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
+ GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
+ if (!region) {
+ // No reserved surface available, create a new one and reserve it
+ region = std::make_shared<CachedGlobalRegion>(addr, size);
+ ReserveGlobalRegion(region);
+ }
+ region->Reload(size);
+ return region;
+}
+
+void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
+ reserve[region->GetAddr()] = region;
+}
+
GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
: RasterizerCache{rasterizer} {}
+GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
+ const GLShader::GlobalMemoryEntry& global_region,
+ Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
+
+ auto& gpu{Core::System::GetInstance().GPU()};
+ const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
+ const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
+ cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
+ ASSERT(cbuf_addr);
+
+ const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
+ const auto size = Memory::Read32(*cbuf_addr + 8);
+ const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
+ ASSERT(actual_addr);
+
+ // Look up global region in the cache based on address
+ GlobalRegion region = TryGet(*actual_addr);
+
+ if (!region) {
+ // No global region found - create a new one
+ region = GetUncachedGlobalRegion(*actual_addr, size);
+ Register(region);
+ }
+
+ return region;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 406a735bc..37830bb7c 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -5,9 +5,13 @@
#pragma once
#include <memory>
+#include <unordered_map>
+
#include <glad/glad.h>
+#include "common/assert.h"
#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -40,6 +44,9 @@ public:
return buffer.handle;
}
+ /// Reloads the global region from guest memory
+ void Reload(u32 size_);
+
// TODO(Rodrigo): When global memory is written (STG), implement flushing
void Flush() override {
UNIMPLEMENTED();
@@ -55,6 +62,17 @@ private:
class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
public:
explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
+
+ /// Gets the current specified shader stage program
+ GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
+ Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
+
+private:
+ GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
+ GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
+ void ReserveGlobalRegion(const GlobalRegion& region);
+
+ std::unordered_map<VAddr, GlobalRegion> reserve;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 71829fee0..ca421ef28 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -300,6 +300,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
+ u32 current_gmem_bindpoint = 0;
u32 current_texture_bindpoint = 0;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -358,6 +359,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
current_constbuffer_bindpoint);
+ // Configure global memory regions for this shader stage.
+ current_gmem_bindpoint = SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage),
+ shader, primitive_mode, current_gmem_bindpoint);
+
// Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
primitive_mode, current_texture_bindpoint);
@@ -993,6 +998,23 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
return current_bindpoint + static_cast<u32>(entries.size());
}
+u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
+ GLenum primitive_mode, u32 current_bindpoint) {
+ for (const auto& global_region : shader->GetShaderEntries().global_memory_entries) {
+ const auto& region =
+ global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage));
+ const GLuint block_index{shader->GetProgramResourceIndex(global_region)};
+ ASSERT(block_index != GL_INVALID_INDEX);
+
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
+ glShaderStorageBlockBinding(shader->GetProgramHandle(primitive_mode), block_index,
+ current_bindpoint);
+ ++current_bindpoint;
+ }
+
+ return current_bindpoint;
+}
+
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
GLenum primitive_mode, u32 current_unit) {
MICROPROFILE_SCOPE(OpenGL_Texture);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 21c51f874..57ab2f627 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -138,6 +138,16 @@ private:
GLenum primitive_mode, u32 current_bindpoint);
/**
+ * Configures the current global memory regions to use for the draw command.
+ * @param stage The shader stage to configure buffers for.
+ * @param shader The shader object that contains the specified stage.
+ * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
+ * @returns The next available bindpoint for use in the next shader stage.
+ */
+ u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
+ GLenum primitive_mode, u32 current_bindpoint);
+
+ /**
* Configures the current textures to use for the draw command.
* @param stage The shader stage to configure textures for.
* @param shader The shader object that contains the specified stage.
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index b3aca39af..54ec23f3a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -108,11 +108,23 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
}
GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
- const auto search{resource_cache.find(buffer.GetHash())};
- if (search == resource_cache.end()) {
+ const auto search{cbuf_resource_cache.find(buffer.GetHash())};
+ if (search == cbuf_resource_cache.end()) {
const GLuint index{
glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
- resource_cache[buffer.GetHash()] = index;
+ cbuf_resource_cache[buffer.GetHash()] = index;
+ return index;
+ }
+
+ return search->second;
+}
+
+GLuint CachedShader::GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem) {
+ const auto search{gmem_resource_cache.find(global_mem.GetHash())};
+ if (search == gmem_resource_cache.end()) {
+ const GLuint index{glGetProgramResourceIndex(program.handle, GL_SHADER_STORAGE_BLOCK,
+ global_mem.GetName().c_str())};
+ gmem_resource_cache[global_mem.GetHash()] = index;
return index;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index e0887dd7b..62b1733b4 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -76,6 +76,9 @@ public:
/// Gets the GL program resource location for the specified resource, caching as needed
GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
+ /// Gets the GL program resource location for the specified resource, caching as needed
+ GLuint GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem);
+
/// Gets the GL uniform location for the specified resource, caching as needed
GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
@@ -107,7 +110,8 @@ private:
OGLProgram triangles_adjacency;
} geometry_programs;
- std::map<u32, GLuint> resource_cache;
+ std::map<u32, GLuint> cbuf_resource_cache;
+ std::map<u32, GLuint> gmem_resource_cache;
std::map<u32, GLint> uniform_cache;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3411cf9e6..e072216f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -34,6 +34,8 @@ using Operation = const OperationNode&;
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
+constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
+ static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -143,6 +145,7 @@ public:
DeclareInputAttributes();
DeclareOutputAttributes();
DeclareConstantBuffers();
+ DeclareGlobalMemory();
DeclareSamplers();
code.AddLine("void execute_" + suffix + "() {");
@@ -190,12 +193,15 @@ public:
ShaderEntries GetShaderEntries() const {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
- ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first);
- entries.const_buffers.push_back(desc);
+ entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
+ cbuf.first);
}
for (const auto& sampler : ir.GetSamplers()) {
- SamplerEntry desc(sampler, stage, GetSampler(sampler));
- entries.samplers.push_back(desc);
+ entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
+ }
+ for (const auto& gmem : ir.GetGlobalMemoryBases()) {
+ entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
+ GetGlobalMemoryBlock(gmem));
}
entries.clip_distances = ir.GetClipDistances();
entries.shader_length = ir.GetLength();
@@ -375,6 +381,15 @@ private:
}
}
+ void DeclareGlobalMemory() {
+ for (const auto& entry : ir.GetGlobalMemoryBases()) {
+ code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {");
+ code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
+ code.AddLine("};");
+ code.AddNewLine();
+ }
+ }
+
void DeclareSamplers() {
const auto& samplers = ir.GetSamplers();
for (const auto& sampler : samplers) {
@@ -538,6 +553,12 @@ private:
UNREACHABLE_MSG("Unmanaged offset node type");
}
+ } else if (const auto gmem = std::get_if<GmemNode>(node)) {
+ const std::string real = Visit(gmem->GetRealAddress());
+ const std::string base = Visit(gmem->GetBaseAddress());
+ const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
+ return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+
} else if (const auto lmem = std::get_if<LmemNode>(node)) {
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
@@ -1471,6 +1492,15 @@ private:
return GetDeclarationWithSuffix(index, "cbuf");
}
+ std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
+ return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
+ }
+
+ std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
+ return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
+ suffix);
+ }
+
std::string GetConstBufferBlock(u32 index) const {
return GetDeclarationWithSuffix(index, "cbuf_block");
}
@@ -1505,8 +1535,10 @@ private:
};
std::string GetCommonDeclarations() {
- return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) +
- "\n"
+ const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
+ const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
+ return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
+ "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 396a560d8..e47bc3729 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -71,9 +71,43 @@ private:
Maxwell::ShaderStage stage{};
};
+class GlobalMemoryEntry {
+public:
+ explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
+ std::string name)
+ : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
+
+ u32 GetCbufIndex() const {
+ return cbuf_index;
+ }
+
+ u32 GetCbufOffset() const {
+ return cbuf_offset;
+ }
+
+ const std::string& GetName() const {
+ return name;
+ }
+
+ Maxwell::ShaderStage GetStage() const {
+ return stage;
+ }
+
+ u32 GetHash() const {
+ return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset;
+ }
+
+private:
+ u32 cbuf_index{};
+ u32 cbuf_offset{};
+ Maxwell::ShaderStage stage{};
+ std::string name;
+};
+
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
+ std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
};