summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorReinUsesLisp <reinuseslisp@airmail.cc>2019-11-13 04:25:52 +0100
committerReinUsesLisp <reinuseslisp@airmail.cc>2019-11-23 01:28:48 +0100
commit287ae2b9e8ea38642a4c8e36f7863d881d4c0e87 (patch)
treefeb26b3520031dfff59e7cf8e85018ab888cc2fa
parentgl_shader_cache: Specialize shared memory size (diff)
downloadyuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.gz
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.bz2
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.lz
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.xz
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.zst
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.zip
-rw-r--r--src/video_core/engines/kepler_compute.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h16
6 files changed, 32 insertions, 21 deletions
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index bd49c6627..c526287b7 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -178,7 +178,12 @@ public:
BitField<24, 5, u32> gpr_alloc;
};
- INSERT_PADDING_WORDS(0x11);
+ union {
+ BitField<0, 20, u32> local_crs_alloc;
+ BitField<24, 5, u32> sass_version;
+ };
+
+ INSERT_PADDING_WORDS(0x10);
} launch_description{};
struct {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ebfe52e6d..d890076f8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
- launch_desc.block_dim_z, launch_desc.shared_alloc);
+ launch_desc.block_dim_z, launch_desc.shared_alloc,
+ launch_desc.local_pos_alloc);
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
state.draw.program_pipeline = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 982c4e23a..b23a982d7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
source += fmt::format("shared uint smem[{}];",
Common::AlignUp(variant.shared_memory_size, 4) / 4);
}
+
+ if (variant.local_memory_size > 0) {
+ source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
+ Common::AlignUp(variant.local_memory_size, 4) / 4);
+ }
}
source += '\n';
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index fb2ba0905..fe016c05c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -510,10 +510,14 @@ private:
}
void DeclareLocalMemory() {
- // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
- // specialization time.
- const u64 local_memory_size =
- stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
+ if (stage == ProgramType::Compute) {
+ code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
+ code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
+ code.AddLine("#endif");
+ return;
+ }
+
+ const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
@@ -851,9 +855,6 @@ private:
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
- if (stage == ProgramType::Compute) {
- LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
- }
return {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
@@ -1228,9 +1229,6 @@ private:
}
target = std::move(*output);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
- if (stage == ProgramType::Compute) {
- LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
- }
target = {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index d2bb8502a..5ebcbbbba 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -52,11 +52,11 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler{};
};
-constexpr u32 NativeVersion = 8;
+constexpr u32 NativeVersion = 9;
// Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16);
-static_assert(sizeof(ProgramVariant) == 32);
+static_assert(sizeof(ProgramVariant) == 36);
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 6f8e51364..28689f6c7 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -64,10 +64,10 @@ struct ProgramVariant final {
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
/// Compute constructor.
- explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
- u32 shared_memory_size) noexcept
+ explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
+ u32 local_memory_size) noexcept
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
- shared_memory_size{shared_memory_size} {}
+ shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
// Graphics specific parameters.
BaseBindings base_bindings{};
@@ -78,12 +78,13 @@ struct ProgramVariant final {
u16 block_y{};
u16 block_z{};
u32 shared_memory_size{};
+ u32 local_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept {
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
- shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
- rhs.block_x, rhs.block_y, rhs.block_z,
- rhs.shared_memory_size);
+ shared_memory_size, local_memory_size) ==
+ std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
+ rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
}
bool operator!=(const ProgramVariant& rhs) const noexcept {
@@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> {
static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^
(static_cast<std::size_t>(variant.block_z) << 48) ^
- (static_cast<std::size_t>(variant.shared_memory_size) << 16);
+ (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
+ (static_cast<std::size_t>(variant.local_memory_size) << 36);
}
};