diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 26 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 42 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_compute_pipeline.h | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 18 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 19 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.h | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 13 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 30 |
11 files changed, 120 insertions, 67 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2d0ef1307..334ed470f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + const GLuint base_binding = graphics_base_storage_bindings[stage]; + const GLuint binding = base_binding + binding_index; + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast<GLsizei>(size), @@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, reinterpret_cast<const GLuint*>(&ssbo)); - } else { - const GLuint base_binding = graphics_base_storage_bindings[stage]; - const GLuint binding = base_binding + binding_index; - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), - static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } } void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, bool is_written) { - if (use_assembly_shaders) { + if (use_storage_buffers) { + if (size != 0) { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), + static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); + } else { + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); + } + } else { const BindlessSSBO ssbo{ .address = buffer.HostGpuAddr() + offset, .length = static_cast<GLsizei>(size), @@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, reinterpret_cast<const GLuint*>(&ssbo)); - } else if (size == 0) { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); - } else { - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), - static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4986c65fd..bc16abafb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -147,6 +147,10 @@ public: image_handles = image_handles_; } + void SetEnableStorageBuffers(bool use_storage_buffers_) { + use_storage_buffers = use_storage_buffers_; + } + private: static constexpr std::array PABO_LUT{ GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, @@ -160,6 +164,8 @@ private: bool use_assembly_shaders = false; bool has_unified_vertex_buffers = false; + bool use_storage_buffers = false; + u32 max_attributes = 0; std::array<GLuint, 5> graphics_base_uniform_bindings{}; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 700ebd8b8..5cf5f97a9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -17,6 +17,15 @@ using VideoCommon::ImageId; constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 16; +template <typename Range> +u32 AccumulateCount(const Range& range) { + u32 num{}; + for (const auto& desc : range) { + num += desc.count; + } + return num; +} + size_t ComputePipelineKey::Hash() const noexcept { return static_cast<size_t>( Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); @@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep return std::memcmp(this, &rhs, sizeof *this) == 0; } -ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { - for (const auto& desc : info.texture_buffer_descriptors) { - num_texture_buffers += desc.count; - } - for (const auto& desc : info.image_buffer_descriptors) { - num_image_buffers += desc.count; - } - u32 num_textures = num_texture_buffers; - for (const auto& desc : info.texture_descriptors) { - num_textures += desc.count; - } + + num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); + num_image_buffers = AccumulateCount(info.image_buffer_descriptors); + + const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; ASSERT(num_textures <= MAX_TEXTURES); - u32 num_images = num_image_buffers; - for (const auto& desc : info.image_descriptors) { - num_images += desc.count; - } + const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; ASSERT(num_images <= MAX_IMAGES); + + const bool is_glasm{assembly_program.handle != 0}; + const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; + use_storage_buffers = + !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory = !use_storage_buffers && + std::ranges::any_of(info.storage_buffers_descriptors, + [](const auto& desc) { return desc.is_written; }); } void ComputePipeline::Configure() { @@ -150,6 +159,7 @@ void ComputePipeline::Configure() { buffer_cache.UpdateComputeBuffers(); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index e3b94e2f3..dd6b62ef2 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -28,6 +28,7 @@ struct Info; namespace OpenGL { +class Device; class ProgramManager; struct ComputePipelineKey { @@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineKey>); class ComputePipeline { public: - explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::KeplerCompute& kepler_compute_, ProgramManager& program_manager_, const Shader::Info& info_, OGLProgram source_program_, OGLAssemblyProgram assembly_program_); void Configure(); + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: TextureCache& texture_cache; BufferCache& buffer_cache; @@ -70,6 +75,9 @@ private: u32 num_texture_buffers{}; u32 num_image_buffers{}; + + bool use_storage_buffers{}; + bool writes_global_memory{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 18bbc4c1f..01da2bb57 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -135,13 +135,13 @@ Device::Device() { "Beta driver 443.24 is known to have issues. There might be performance issues."); disable_fast_buffer_sub_data = true; } - max_uniform_buffers = BuildMaxUniformBuffers(); uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); + max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_shader_ballot = GLAD_GL_ARB_shader_ballot; @@ -236,22 +236,6 @@ std::string Device::GetVendorName() const { return vendor_name; } -Device::Device(std::nullptr_t) { - max_uniform_buffers.fill(std::numeric_limits<u32>::max()); - uniform_buffer_alignment = 4; - shader_storage_alignment = 4; - max_vertex_attributes = 16; - max_varyings = 15; - max_compute_shared_memory_size = 0x10000; - has_warp_intrinsics = true; - has_shader_ballot = true; - has_vertex_viewport_layer = true; - has_image_load_formatted = true; - has_texture_shadow_lod = true; - has_variable_aoffi = true; - has_depth_buffer_float = true; -} - bool Device::TestVariableAoffi() { return TestProgram(R"(#version 430 core // This is a unit test, please ignore me on apitrace bug reports. diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 152a3acd3..d67f5693c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -13,7 +13,6 @@ namespace OpenGL { class Device { public: explicit Device(); - explicit Device(std::nullptr_t); [[nodiscard]] std::string GetVendorName() const; @@ -41,6 +40,10 @@ public: return max_compute_shared_memory_size; } + u32 GetMaxGLASMStorageBufferBlocks() const { + return max_glasm_storage_buffer_blocks; + } + bool HasWarpIntrinsics() const { return has_warp_intrinsics; } @@ -124,6 +127,7 @@ private: u32 max_vertex_attributes{}; u32 max_varyings{}; u32 max_compute_shared_memory_size{}; + u32 max_glasm_storage_buffer_blocks{}; bool has_warp_intrinsics{}; bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 32df35202..19d85c482 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64; constexpr u32 MAX_IMAGES = 8; template <typename Range> -u32 AccumulateCount(Range&& range) { +u32 AccumulateCount(const Range& range) { u32 num{}; for (const auto& desc : range) { num += desc.count; @@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc return std::memcmp(this, &rhs, Size()) == 0; } -GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, +GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu } u32 num_textures{}; u32 num_images{}; + u32 num_storage_buffers{}; for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { const auto& info{stage_infos[stage]}; if (stage < 4) { @@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu num_textures += AccumulateCount(info.texture_descriptors); num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); - if (assembly_programs[0].handle != 0 && xfb_state) { + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + + if (assembly_shaders && xfb_state) { GenerateTransformFeedbackState(*xfb_state); } } @@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); + buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 62f700cf5..c1113e180 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,6 +20,7 @@ namespace OpenGL { +class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>); class GraphicsPipeline { public: - explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, - Tegra::MemoryManager& gpu_memory_, + explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, + BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, OGLProgram program_, @@ -77,6 +78,10 @@ public: } } + [[nodiscard]] bool WritesGlobalMemory() const noexcept { + return writes_global_memory; + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -99,6 +104,9 @@ private: std::array<u32, 5> num_texture_buffers{}; std::array<u32, 5> num_image_buffers{}; + bool use_storage_buffers{}; + bool writes_global_memory{}; + static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; GLsizei num_xfb_strides{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eec01e8c2..5d4e80364 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { EndTransformFeedback(); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute() { - ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; - if (!program) { + ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; + if (!pipeline) { return; } - program->Configure(); + pipeline->Configure(); const auto& qmd{kepler_compute.launch_description}; glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; + has_written_global_memory |= pipeline->WritesGlobalMemory(); } void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { @@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() { // Make sure memory stored from the previous GL command stream is visible // This is only needed on assembly shaders where we write to GPU memory with raw pointers - // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used - // and prefer using NV_shader_storage_buffer_object when possible - if (Settings::values.use_assembly_shaders.GetValue()) { + if (has_written_global_memory) { + has_written_global_memory = false; glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); } glFlush(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index afd43b2ee..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -225,7 +225,8 @@ private: std::array<GLuint, MAX_IMAGES> image_handles{}; /// Number of commands queued to the OpenGL driver. Resetted on flush. - std::size_t num_queued_commands = 0; + size_t num_queued_commands = 0; + bool has_written_global_memory = false; u32 last_clip_distance_mask = 0; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3aa5ac31d..287f497b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) { } Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, - const Shader::IR::Program& program) { + const Shader::IR::Program& program, + bool glasm_use_storage_buffers) { Shader::RuntimeInfo info; switch (program.stage) { case Shader::Stage::TessellationEval: @@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, info.input_topology = Shader::InputTopology::TrianglesAdjacency; break; } + info.glasm_use_storage_buffers = glasm_use_storage_buffers; return info; } @@ -435,7 +437,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs, bool build_in_parallel) { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); - size_t env_index{0}; + size_t env_index{}; + u32 total_storage_buffers{}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { if (key.unique_hashes[index] == 0) { @@ -447,7 +450,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); + + for (const auto& desc : programs[index].info.storage_buffers_descriptors) { + total_storage_buffers += desc.count; + } } + const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; + const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; + std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; OGLProgram source_program; @@ -466,7 +476,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( const size_t stage_index{index - 1}; infos[stage_index] = &program.info; - const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; + const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; if (device.UseAssemblyShaders()) { const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); @@ -479,7 +489,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( LinkProgram(source_program.handle); } return std::make_unique<GraphicsPipeline>( - texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, + device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, std::move(source_program), std::move(assembly_programs), infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } @@ -508,10 +518,18 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools& Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; + + u32 num_storage_buffers{}; + for (const auto& desc : program.info.storage_buffers_descriptors) { + num_storage_buffers += desc.count; + } + Shader::RuntimeInfo info; + info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + OGLAssemblyProgram asm_program; OGLProgram source_program; if (device.UseAssemblyShaders()) { - const std::string code{EmitGLASM(profile, program)}; + const std::string code{EmitGLASM(profile, info, program)}; asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); } else { const std::vector<u32> code{EmitSPIRV(profile, program)}; @@ -519,7 +537,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools& AddShader(GL_COMPUTE_SHADER, source_program.handle, code); LinkProgram(source_program.handle); } - return std::make_unique<ComputePipeline>(texture_cache, buffer_cache, gpu_memory, + return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory, kepler_compute, program_manager, program.info, std::move(source_program), std::move(asm_program)); } |