diff options
author | bunnei <bunneidev@gmail.com> | 2019-01-30 04:02:01 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-30 04:02:01 +0100 |
commit | 52bb5245268e113bfacc4c4bb27a74ea3058adbc (patch) | |
tree | 4ba30f0ad404bd4120dd5b2e4f21193bf03952fd /src/video_core/renderer_opengl | |
parent | video_core/GPU Implemented the GPU PFIFO puller semaphore operations. (#1908) (diff) | |
parent | gl_rasterizer: Implement global memory management (diff) | |
download | yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.gz yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.bz2 yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.lz yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.xz yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.tar.zst yuzu-52bb5245268e113bfacc4c4bb27a74ea3058adbc.zip |
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 70 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.h | 18 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 22 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 18 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 44 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 34 |
8 files changed, 212 insertions, 10 deletions
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 7992b82c4..c7f32feaa 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -4,8 +4,13 @@ #include <glad/glad.h> +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "core/memory.h" #include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/utils.h" namespace OpenGL { @@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{ LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); } +void CachedGlobalRegion::Reload(u32 size_) { + constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); + + size = size_; + if (size > max_size) { + size = max_size; + LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, + max_size); + } + + // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer + glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); + glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); +} + +GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { + const auto search{reserve.find(addr)}; + if (search == reserve.end()) { + return {}; + } + return search->second; +} + +GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { + GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; + if (!region) { + // No reserved surface available, create a new one and reserve it + region = std::make_shared<CachedGlobalRegion>(addr, size); + ReserveGlobalRegion(region); + } + region->Reload(size); + return region; +} + +void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { + reserve[region->GetAddr()] = region; +} + GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} +GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( + const GLShader::GlobalMemoryEntry& global_region, + Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { + + auto& gpu{Core::System::GetInstance().GPU()}; + const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; + const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( + cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); + ASSERT(cbuf_addr); + + const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); + const auto size = Memory::Read32(*cbuf_addr + 8); + const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu); + ASSERT(actual_addr); + + // Look up global region in the cache based on address + GlobalRegion region = TryGet(*actual_addr); + + if (!region) { + // No global region found - create a new one + region = GetUncachedGlobalRegion(*actual_addr, size); + Register(region); + } + + return region; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 406a735bc..37830bb7c 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -5,9 +5,13 @@ #pragma once #include <memory> +#include <unordered_map> + #include <glad/glad.h> +#include "common/assert.h" #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -40,6 +44,9 @@ public: return buffer.handle; } + /// Reloads the global region from guest memory + void Reload(u32 size_); + // TODO(Rodrigo): When global memory is written (STG), implement flushing void Flush() override { UNIMPLEMENTED(); @@ -55,6 +62,17 @@ private: class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { public: explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); + + /// Gets the current specified shader stage program + GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, + Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); + +private: + GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; + GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); + void ReserveGlobalRegion(const GlobalRegion& region); + + std::unordered_map<VAddr, GlobalRegion> reserve; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 71829fee0..ca421ef28 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -300,6 +300,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; + u32 current_gmem_bindpoint = 0; u32 current_texture_bindpoint = 0; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; @@ -358,6 +359,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, current_constbuffer_bindpoint); + // Configure global memory regions for this shader stage. + current_gmem_bindpoint = SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage), + shader, primitive_mode, current_gmem_bindpoint); + // Configure the textures for this shader stage. current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, current_texture_bindpoint); @@ -993,6 +998,23 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad return current_bindpoint + static_cast<u32>(entries.size()); } +u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader, + GLenum primitive_mode, u32 current_bindpoint) { + for (const auto& global_region : shader->GetShaderEntries().global_memory_entries) { + const auto& region = + global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage)); + const GLuint block_index{shader->GetProgramResourceIndex(global_region)}; + ASSERT(block_index != GL_INVALID_INDEX); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); + glShaderStorageBlockBinding(shader->GetProgramHandle(primitive_mode), block_index, + current_bindpoint); + ++current_bindpoint; + } + + return current_bindpoint; +} + u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, GLenum primitive_mode, u32 current_unit) { MICROPROFILE_SCOPE(OpenGL_Texture); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 21c51f874..57ab2f627 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -138,6 +138,16 @@ private: GLenum primitive_mode, u32 current_bindpoint); /** + * Configures the current global memory regions to use for the draw command. + * @param stage The shader stage to configure buffers for. + * @param shader The shader object that contains the specified stage. + * @param current_bindpoint The offset at which to start counting new buffer bindpoints. + * @returns The next available bindpoint for use in the next shader stage. + */ + u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, + GLenum primitive_mode, u32 current_bindpoint); + + /** * Configures the current textures to use for the draw command. * @param stage The shader stage to configure textures for. * @param shader The shader object that contains the specified stage. diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b3aca39af..54ec23f3a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -108,11 +108,23 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) } GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { - const auto search{resource_cache.find(buffer.GetHash())}; - if (search == resource_cache.end()) { + const auto search{cbuf_resource_cache.find(buffer.GetHash())}; + if (search == cbuf_resource_cache.end()) { const GLuint index{ glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; - resource_cache[buffer.GetHash()] = index; + cbuf_resource_cache[buffer.GetHash()] = index; + return index; + } + + return search->second; +} + +GLuint CachedShader::GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem) { + const auto search{gmem_resource_cache.find(global_mem.GetHash())}; + if (search == gmem_resource_cache.end()) { + const GLuint index{glGetProgramResourceIndex(program.handle, GL_SHADER_STORAGE_BLOCK, + global_mem.GetName().c_str())}; + gmem_resource_cache[global_mem.GetHash()] = index; return index; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0887dd7b..62b1733b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -76,6 +76,9 @@ public: /// Gets the GL program resource location for the specified resource, caching as needed GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); + /// Gets the GL program resource location for the specified resource, caching as needed + GLuint GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem); + /// Gets the GL uniform location for the specified resource, caching as needed GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); @@ -107,7 +110,8 @@ private: OGLProgram triangles_adjacency; } geometry_programs; - std::map<u32, GLuint> resource_cache; + std::map<u32, GLuint> cbuf_resource_cache; + std::map<u32, GLuint> gmem_resource_cache; std::map<u32, GLint> uniform_cache; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3411cf9e6..e072216f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -34,6 +34,8 @@ using Operation = const OperationNode&; enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); +constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = + static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; @@ -143,6 +145,7 @@ public: DeclareInputAttributes(); DeclareOutputAttributes(); DeclareConstantBuffers(); + DeclareGlobalMemory(); DeclareSamplers(); code.AddLine("void execute_" + suffix + "() {"); @@ -190,12 +193,15 @@ public: ShaderEntries GetShaderEntries() const { ShaderEntries entries; for (const auto& cbuf : ir.GetConstantBuffers()) { - ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first); - entries.const_buffers.push_back(desc); + entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first), + cbuf.first); } for (const auto& sampler : ir.GetSamplers()) { - SamplerEntry desc(sampler, stage, GetSampler(sampler)); - entries.samplers.push_back(desc); + entries.samplers.emplace_back(sampler, stage, GetSampler(sampler)); + } + for (const auto& gmem : ir.GetGlobalMemoryBases()) { + entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage, + GetGlobalMemoryBlock(gmem)); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -375,6 +381,15 @@ private: } } + void DeclareGlobalMemory() { + for (const auto& entry : ir.GetGlobalMemoryBases()) { + code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {"); + code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); + code.AddLine("};"); + code.AddNewLine(); + } + } + void DeclareSamplers() { const auto& samplers = ir.GetSamplers(); for (const auto& sampler : samplers) { @@ -538,6 +553,12 @@ private: UNREACHABLE_MSG("Unmanaged offset node type"); } + } else if (const auto gmem = std::get_if<GmemNode>(node)) { + const std::string real = Visit(gmem->GetRealAddress()); + const std::string base = Visit(gmem->GetBaseAddress()); + const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; + return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); + } else if (const auto lmem = std::get_if<LmemNode>(node)) { return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); @@ -1471,6 +1492,15 @@ private: return GetDeclarationWithSuffix(index, "cbuf"); } + std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { + return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); + } + + std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { + return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, + suffix); + } + std::string GetConstBufferBlock(u32 index) const { return GetDeclarationWithSuffix(index, "cbuf_block"); } @@ -1505,8 +1535,10 @@ private: }; std::string GetCommonDeclarations() { - return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) + - "\n" + const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); + const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); + return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + + "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + "#define ftoi floatBitsToInt\n" "#define ftou floatBitsToUint\n" "#define itof intBitsToFloat\n" diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 396a560d8..e47bc3729 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -71,9 +71,43 @@ private: Maxwell::ShaderStage stage{}; }; +class GlobalMemoryEntry { +public: + explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage, + std::string name) + : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {} + + u32 GetCbufIndex() const { + return cbuf_index; + } + + u32 GetCbufOffset() const { + return cbuf_offset; + } + + const std::string& GetName() const { + return name; + } + + Maxwell::ShaderStage GetStage() const { + return stage; + } + + u32 GetHash() const { + return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset; + } + +private: + u32 cbuf_index{}; + u32 cbuf_offset{}; + Maxwell::ShaderStage stage{}; + std::string name; +}; + struct ShaderEntries { std::vector<ConstBufferEntry> const_buffers; std::vector<SamplerEntry> samplers; + std::vector<GlobalMemoryEntry> global_memory_entries; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::size_t shader_length{}; }; |