diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-11-13 04:25:52 +0100 |
---|---|---|
committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2019-11-23 01:28:48 +0100 |
commit | 287ae2b9e8ea38642a4c8e36f7863d881d4c0e87 (patch) | |
tree | feb26b3520031dfff59e7cf8e85018ab888cc2fa | |
parent | gl_shader_cache: Specialize shared memory size (diff) | |
download | yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.gz yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.bz2 yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.lz yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.xz yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.zst yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.zip |
6 files changed, 32 insertions, 21 deletions
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index bd49c6627..c526287b7 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -178,7 +178,12 @@ public: BitField<24, 5, u32> gpr_alloc; }; - INSERT_PADDING_WORDS(0x11); + union { + BitField<0, 20, u32> local_crs_alloc; + BitField<24, 5, u32> sass_version; + }; + + INSERT_PADDING_WORDS(0x10); } launch_description{}; struct { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ebfe52e6d..d890076f8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z, launch_desc.shared_alloc); + launch_desc.block_dim_z, launch_desc.shared_alloc, + launch_desc.local_pos_alloc); std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.program_pipeline = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 982c4e23a..b23a982d7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy source += fmt::format("shared uint smem[{}];", Common::AlignUp(variant.shared_memory_size, 4) / 4); } + + if (variant.local_memory_size > 0) { + source += fmt::format("#define LOCAL_MEMORY_SIZE {}", + Common::AlignUp(variant.local_memory_size, 4) / 4); + } } source += '\n'; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index fb2ba0905..fe016c05c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -510,10 +510,14 @@ private: } void DeclareLocalMemory() { - // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at - // specialization time. - const u64 local_memory_size = - stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); + if (stage == ProgramType::Compute) { + code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); + code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); + code.AddLine("#endif"); + return; + } + + const u64 local_memory_size = header.GetLocalMemorySize(); if (local_memory_size == 0) { return; } @@ -851,9 +855,6 @@ private: } if (const auto lmem = std::get_if<LmemNode>(&*node)) { - if (stage == ProgramType::Compute) { - LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); - } return { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; @@ -1228,9 +1229,6 @@ private: } target = std::move(*output); } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { - if (stage == ProgramType::Compute) { - LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); - } target = { fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), Type::Uint}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index d2bb8502a..5ebcbbbba 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -52,11 +52,11 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 8; +constexpr u32 NativeVersion = 9; // Making sure sizes doesn't change by accident static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 32); +static_assert(sizeof(ProgramVariant) == 36); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 6f8e51364..28689f6c7 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -64,10 +64,10 @@ struct ProgramVariant final { : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} /// Compute constructor. - explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, - u32 shared_memory_size) noexcept + explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, + u32 local_memory_size) noexcept : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, - shared_memory_size{shared_memory_size} {} + shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} // Graphics specific parameters. BaseBindings base_bindings{}; @@ -78,12 +78,13 @@ struct ProgramVariant final { u16 block_y{}; u16 block_z{}; u32 shared_memory_size{}; + u32 local_memory_size{}; bool operator==(const ProgramVariant& rhs) const noexcept { return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, - shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode, - rhs.block_x, rhs.block_y, rhs.block_z, - rhs.shared_memory_size); + shared_memory_size, local_memory_size) == + std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, + rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size); } bool operator!=(const ProgramVariant& rhs) const noexcept { @@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> { static_cast<std::size_t>(variant.block_x) ^ (static_cast<std::size_t>(variant.block_y) << 32) ^ (static_cast<std::size_t>(variant.block_z) << 48) ^ - (static_cast<std::size_t>(variant.shared_memory_size) << 16); + (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^ + (static_cast<std::size_t>(variant.local_memory_size) << 36); } }; |