diff options
author | Ameer <aj662@drexel.edu> | 2020-07-04 06:59:40 +0200 |
---|---|---|
committer | Ameer <aj662@drexel.edu> | 2020-07-04 06:59:40 +0200 |
commit | f829932ed191ad469df01342191bf2725e8a20bb (patch) | |
tree | 0ae185ce3ef43ef9b085aae7b9ad5abb04e3d239 /src/video_core/renderer_opengl | |
parent | Fix for always firing triggers on some controllers, trigger threshold more universal (diff) | |
parent | Merge pull request #4218 from ogniK5377/opus-external (diff) | |
download | yuzu-f829932ed191ad469df01342191bf2725e8a20bb.tar yuzu-f829932ed191ad469df01342191bf2725e8a20bb.tar.gz yuzu-f829932ed191ad469df01342191bf2725e8a20bb.tar.bz2 yuzu-f829932ed191ad469df01342191bf2725e8a20bb.tar.lz yuzu-f829932ed191ad469df01342191bf2725e8a20bb.tar.xz yuzu-f829932ed191ad469df01342191bf2725e8a20bb.tar.zst yuzu-f829932ed191ad469df01342191bf2725e8a20bb.zip |
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 69 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 37 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 75 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 50 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.cpp | 56 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.h | 16 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 44 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 17 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 3 |
13 files changed, 236 insertions, 166 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..e461e4c70 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -22,21 +22,53 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { +Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); + } } Buffer::~Buffer() = default; +void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { + glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), + data); +} + +void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { + MICROPROFILE_SCOPE(OpenGL_Buffer_Download); + const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); + const GLintptr gl_offset = static_cast<GLintptr>(offset); + if (read_buffer.handle == 0) { + read_buffer.Create(); + glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, + GL_STREAM_READ); + } + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); + glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); +} + +void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size) { + glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +} + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, - const Device& device, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { + const Device& device_, std::size_t stream_size) + : GenericBufferCache{rasterizer, system, + std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, + device{device_} { if (!device.HasFastBufferSubData()) { return; } - static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); + static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); for (const GLuint cbuf : cbufs) { glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); @@ -48,39 +80,20 @@ OGLBufferCache::~OGLBufferCache() { } std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { - return std::make_shared<Buffer>(cpu_addr, size); + return std::make_shared<Buffer>(device, cpu_addr, size); } -GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { - return 0; -} - -void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { - glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) { - MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), - static_cast<GLsizeiptr>(size), data); -} - -void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset), - static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); +OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { + return {0, 0, 0}; } OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, std::size_t size) { DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint cbuf = cbufs[cbuf_cursor++]; + glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); - return {cbuf, 0}; + return {cbuf, 0, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..88fdc0536 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -25,15 +25,28 @@ class RasterizerOpenGL; class Buffer : public VideoCommon::BufferBlock { public: - explicit Buffer(VAddr cpu_addr, const std::size_t size); + explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); ~Buffer(); - GLuint Handle() const { + void Upload(std::size_t offset, std::size_t size, const u8* data); + + void Download(std::size_t offset, std::size_t size, u8* data); + + void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, + std::size_t size); + + GLuint Handle() const noexcept { return gl_buffer.handle; } + u64 Address() const noexcept { + return gpu_address; + } + private: OGLBuffer gl_buffer; + OGLBuffer read_buffer; + u64 gpu_address = 0; }; using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; @@ -43,7 +56,7 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - GLuint GetEmptyBuffer(std::size_t) override; + BufferInfo GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; @@ -52,22 +65,16 @@ public: protected: std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; - - void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; - - void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; - BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; private: + static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * + Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; + + const Device& device; + std::size_t cbuf_cursor = 0; - std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * - Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> - cbufs; + std::array<GLuint, NUM_CBUFS> cbufs{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b31d604e4..208fc6167 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -178,7 +178,7 @@ bool IsASTCSupported() { for (const GLenum format : formats) { for (const GLenum support : required_support) { GLint value; - glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); + glGetInternalformativ(target, format, support, 1, &value); if (value != GL_FULL_SUPPORT) { return false; } @@ -193,6 +193,7 @@ bool IsASTCSupported() { Device::Device() : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); + const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); @@ -216,12 +217,18 @@ Device::Device() has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); + has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod"); has_astc = IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); - has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; + + // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive + // uniform buffers as "push constants" + has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; + use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; @@ -245,6 +252,7 @@ Device::Device(std::nullptr_t) { has_shader_ballot = true; has_vertex_viewport_layer = true; has_image_load_formatted = true; + has_texture_shadow_lod = true; has_variable_aoffi = true; } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 145347943..e1d811966 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -68,6 +68,14 @@ public: return has_image_load_formatted; } + bool HasTextureShadowLod() const { + return has_texture_shadow_lod; + } + + bool HasVertexBufferUnifiedMemory() const { + return has_vertex_buffer_unified_memory; + } + bool HasASTC() const { return has_astc; } @@ -110,6 +118,8 @@ private: bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; + bool has_texture_shadow_lod{}; + bool has_vertex_buffer_unified_memory{}; bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..e960a0ef1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; -constexpr std::size_t NumSupportedVertexAttributes = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; +constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, @@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() { // avoid OpenGL errors. // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't // assume every shader uses them all. - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexFormat0 + index]) { continue; } @@ -212,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() { if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { glVertexAttribIFormat(gl_index, attrib.ComponentCount(), - MaxwellToGL::VertexType(attrib), attrib.offset); + MaxwellToGL::VertexFormat(attrib), attrib.offset); } else { - glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + glVertexAttribFormat(gl_index, attrib.ComponentCount(), + MaxwellToGL::VertexFormat(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); } glVertexAttribBinding(gl_index, attrib.buffer); @@ -231,9 +233,11 @@ void RasterizerOpenGL::SetupVertexBuffer() { MICROPROFILE_SCOPE(OpenGL_VB); + const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); + // Upload all guest vertex arrays sequentially to our buffer const auto& regs = gpu.regs; - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { if (!flags[Dirty::VertexBuffer0 + index]) { continue; } @@ -246,16 +250,25 @@ void RasterizerOpenGL::SetupVertexBuffer() { const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); - ASSERT(end >= start); + + const GLuint gl_index = static_cast<GLuint>(index); const u64 size = end - start; if (size == 0) { - glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + if (use_unified_memory) { + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); + } continue; } - const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, - vertex_array.stride); + const auto info = buffer_cache.UploadMemory(start, size); + if (use_unified_memory) { + glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, + info.address + info.offset, size); + } else { + glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); + } } } @@ -268,7 +281,7 @@ void RasterizerOpenGL::SetupVertexInstances() { flags[Dirty::VertexInstances] = false; const auto& regs = gpu.regs; - for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexInstance0 + index]) { continue; } @@ -285,9 +298,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); - const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); - return offset; + const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); + return info.offset; } void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { @@ -643,9 +656,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; ubo.SetFromRegs(gpu); - const auto [buffer, offset] = + const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, static_cast<GLsizeiptr>(sizeof(ubo))); } @@ -956,8 +969,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, if (device.UseAssemblyShaders()) { glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, - buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); } return; } @@ -970,24 +982,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); const GPUVAddr gpu_addr = buffer.address; - auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); + auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); if (device.UseAssemblyShaders()) { UNIMPLEMENTED_IF(use_unified); - if (offset != 0) { + if (info.offset != 0) { const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; - glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); - cbuf = staging_cbuf; - offset = 0; + glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); + info.handle = staging_cbuf; + info.offset = 0; } - glBindBufferRangeNV(stage, binding, cbuf, offset, size); + glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); return; } if (use_unified) { - glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); + glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, + unified_offset, size); } else { - glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); } } @@ -1023,9 +1036,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; - const auto [ssbo, buffer_offset] = - buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, static_cast<GLsizeiptr>(size)); } @@ -1712,8 +1724,9 @@ void RasterizerOpenGL::EndTransformFeedback() { const GLuint handle = transform_feedback_buffers[index].handle; const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; - const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, + static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 46e780a06..c6a3bf3a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -460,8 +460,9 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const u8* host_ptr_b = memory_manager.GetPointer(address_b); code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); } + const std::size_t code_size = code.size() * sizeof(u64); - const auto unique_identifier = GetUniqueIdentifier( + const u64 unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); const ShaderParameters params{system, disk_cache, device, @@ -477,7 +478,7 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { Shader* const result = shader.get(); if (cpu_addr) { - Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64)); + Register(std::move(shader), *cpu_addr, code_size); } else { null_shader = std::move(shader); } @@ -495,8 +496,9 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one - auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; - const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; + ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; + const std::size_t code_size{code.size() * sizeof(u64)}; + const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const ShaderParameters params{system, disk_cache, device, *cpu_addr, host_ptr, unique_identifier}; @@ -511,7 +513,7 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { Shader* const result = kernel.get(); if (cpu_addr) { - Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64)); + Register(std::move(kernel), *cpu_addr, code_size); } else { null_kernel = std::move(kernel); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6848f1388..994aaeaf2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -37,7 +37,6 @@ namespace OpenGL { class Device; class RasterizerOpenGL; -struct UnspecializedShader; using Maxwell = Tegra::Engines::Maxwell3D::Regs; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d6e30b321..2c49aeaac 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::PixelImap; using Tegra::Shader::Register; +using Tegra::Shader::TextureType; using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; @@ -526,6 +527,9 @@ private: if (device.HasImageLoadFormatted()) { code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); } + if (device.HasTextureShadowLod()) { + code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); + } if (device.HasWarpIntrinsics()) { code.AddLine("#extension GL_NV_gpu_shader5 : require"); code.AddLine("#extension GL_NV_shader_thread_group : require"); @@ -909,13 +913,13 @@ private: return "samplerBuffer"; } switch (sampler.type) { - case Tegra::Shader::TextureType::Texture1D: + case TextureType::Texture1D: return "sampler1D"; - case Tegra::Shader::TextureType::Texture2D: + case TextureType::Texture2D: return "sampler2D"; - case Tegra::Shader::TextureType::Texture3D: + case TextureType::Texture3D: return "sampler3D"; - case Tegra::Shader::TextureType::TextureCube: + case TextureType::TextureCube: return "samplerCube"; default: UNREACHABLE(); @@ -1380,8 +1384,19 @@ private: const std::size_t count = operation.GetOperandsCount(); const bool has_array = meta->sampler.is_array; const bool has_shadow = meta->sampler.is_shadow; + const bool workaround_lod_array_shadow_as_grad = + !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube); + + std::string expr = "texture"; + + if (workaround_lod_array_shadow_as_grad) { + expr += "Grad"; + } else { + expr += function_suffix; + } - std::string expr = "texture" + function_suffix; if (!meta->aoffi.empty()) { expr += "Offset"; } else if (!meta->ptp.empty()) { @@ -1415,6 +1430,16 @@ private: expr += ')'; } + if (workaround_lod_array_shadow_as_grad) { + switch (meta->sampler.type) { + case TextureType::Texture2D: + return expr + ", vec2(0.0), vec2(0.0))"; + case TextureType::TextureCube: + return expr + ", vec3(0.0), vec3(0.0))"; + } + UNREACHABLE(); + } + for (const auto& variant : extras) { if (const auto argument = std::get_if<TextureArgument>(&variant)) { expr += GenerateTextureArgument(*argument); @@ -2041,8 +2066,19 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture( - operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + std::string expr{}; + + if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && + ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || + meta->sampler.type == TextureType::TextureCube)) { + LOG_ERROR(Render_OpenGL, + "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); + expr = GenerateTexture(operation, "Lod", {}); + } else { + expr = GenerateTexture(operation, "Lod", + {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); + } + if (meta->sampler.is_shadow) { expr = "vec4(" + expr + ')'; } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 932a2f69e..3655ff629 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -2,11 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <deque> +#include <tuple> #include <vector> + #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", @@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, - bool use_persistent) +OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) : buffer_size(size) { gl_buffer.Create(); @@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p allocate_size *= 2; } - if (use_persistent) { - persistent = true; - coherent = prefer_coherent; - const GLbitfield flags = - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); - glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); - mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( - gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); - } else { - glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); + static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; + glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); + mapped_ptr = static_cast<u8*>( + glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); + + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); } } OGLStreamBuffer::~OGLStreamBuffer() { - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } + glUnmapNamedBuffer(gl_buffer.handle); gl_buffer.Release(); } @@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a bool invalidate = false; if (buffer_pos + size > buffer_size) { + MICROPROFILE_SCOPE(OpenGL_StreamBuffer); + glInvalidateBufferData(gl_buffer.handle); + buffer_pos = 0; invalidate = true; - - if (persistent) { - glUnmapNamedBuffer(gl_buffer.handle); - } } - if (invalidate || !persistent) { - MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | - (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | - (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); - mapped_ptr = static_cast<u8*>( - glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); - mapped_offset = buffer_pos; - } - - return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); + return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); } void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); - if (!coherent && size > 0) { - glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); - } - - if (!persistent) { - glUnmapNamedBuffer(gl_buffer.handle); + if (size > 0) { + glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); } buffer_pos += size; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 866da3594..307a67113 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -11,10 +11,11 @@ namespace OpenGL { +class Device; + class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, - bool use_persistent = true); + explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); ~OGLStreamBuffer(); /* @@ -33,19 +34,20 @@ public: return gl_buffer.handle; } - GLsizeiptr Size() const { + u64 Address() const { + return gpu_address; + } + + GLsizeiptr Size() const noexcept { return buffer_size; } private: OGLBuffer gl_buffer; - bool coherent = false; - bool persistent = false; - + GLuint64EXT gpu_address = 0; GLintptr buffer_pos = 0; GLsizeiptr buffer_size = 0; - GLintptr mapped_offset = 0; GLsizeiptr mapped_size = 0; u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 35e329240..fe9bd4b5a 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,10 +24,11 @@ namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -inline GLenum VertexType(Maxwell::VertexAttribute attrib) { +inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { - case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::UnsignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT_2_10_10_10_REV; } break; - case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -84,36 +86,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_UNSIGNED_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_UNSIGNED_SHORT; - } - break; - case Maxwell::VertexAttribute::Type::SignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_SHORT; - } - break; } - UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(), attrib.SizeString()); return {}; } @@ -217,6 +191,12 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { } else { return GL_MIRROR_CLAMP_TO_EDGE; } + case Tegra::Texture::WrapMode::MirrorOnceClampOGL: + if (GL_EXT_texture_mirror_clamp) { + return GL_MIRROR_CLAMP_EXT; + } else { + return GL_MIRROR_CLAMP_TO_EDGE; + } } UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); return GL_REPEAT; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6214fcbc3..c40adb6e7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() { // Clear screen to black LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); + + // Enable unified vertex attributes and query vertex buffer address when the driver supports it + if (device.HasVertexBufferUnifiedMemory()) { + glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); + + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } } void RendererOpenGL::AddTelemetryFields() { @@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { offsetof(ScreenRectVertex, tex_coord)); glVertexAttribBinding(PositionLocation, 0); glVertexAttribBinding(TexCoordLocation, 0); - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(vertices)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } glBindTextureUnit(0, screen_info.display_texture); glBindSampler(0, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 61bf507f4..8b18d32e6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -107,6 +107,9 @@ private: OGLPipeline pipeline; OGLFramebuffer screenshot_framebuffer; + // GPU address of the vertex buffer + GLuint64EXT vertex_buffer_address = 0; + /// Display information for Switch screen ScreenInfo screen_info; |