diff options
author | ReinUsesLisp <reinuseslisp@airmail.cc> | 2020-02-29 07:49:51 +0100 |
---|---|---|
committer | ReinUsesLisp <reinuseslisp@airmail.cc> | 2020-03-09 22:40:07 +0100 |
commit | 0528be5c92db67b608dc64322c55e57629c80619 (patch) | |
tree | 6705b628f5b7db52300f967f5101d172384eb101 /src/video_core | |
parent | video_core: Rename "const buffer locker" to "registry" (diff) | |
download | yuzu-0528be5c92db67b608dc64322c55e57629c80619.tar yuzu-0528be5c92db67b608dc64322c55e57629c80619.tar.gz yuzu-0528be5c92db67b608dc64322c55e57629c80619.tar.bz2 yuzu-0528be5c92db67b608dc64322c55e57629c80619.tar.lz yuzu-0528be5c92db67b608dc64322c55e57629c80619.tar.xz yuzu-0528be5c92db67b608dc64322c55e57629c80619.tar.zst yuzu-0528be5c92db67b608dc64322c55e57629c80619.zip |
Diffstat (limited to '')
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 28 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 84 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 4 | ||||
-rw-r--r-- | src/video_core/shader/registry.cpp | 59 | ||||
-rw-r--r-- | src/video_core/shader/registry.h | 49 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 9 |
8 files changed, 176 insertions, 75 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 87d25b5a5..72a5dc82a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -166,8 +166,9 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; - auto registry = std::make_shared<Registry>(entry.type, guest_profile); - registry->SetBoundBuffer(entry.bound_buffer); + const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, + entry.graphics_info, entry.compute_info}; + const auto registry = std::make_shared<Registry>(entry.type, info); for (const auto& [address, value] : entry.keys) { const auto [buffer, offset] = address; registry->InsertKey(buffer, offset, value); @@ -184,9 +185,9 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, const ShaderIR& ir, - bool hint_retrievable = false) { + const Registry& registry, bool hint_retrievable = false) { LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); - const std::string glsl = DecompileShader(device, ir, shader_type); + const std::string glsl = DecompileShader(device, ir, registry, shader_type); OGLShader shader; shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); @@ -239,7 +240,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, // if (!code_b.empty()) { // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); // } - auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir); + auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); ShaderDiskCacheEntry entry; entry.type = shader_type; @@ -247,6 +248,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.code_b = std::move(code_b); entry.unique_identifier = params.unique_identifier; entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); entry.keys = registry->GetKeys(); entry.bound_samplers = registry->GetBoundSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers(); @@ -260,16 +262,18 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto registry = - std::make_shared<Registry>(ShaderType::Compute, params.system.GPU().KeplerCompute()); + auto& engine = params.system.GPU().KeplerCompute(); + auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); - auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); + const u64 uid = params.unique_identifier; + auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); ShaderDiskCacheEntry entry; entry.type = ShaderType::Compute; entry.code = std::move(code); - entry.unique_identifier = params.unique_identifier; + entry.unique_identifier = uid; entry.bound_buffer = registry->GetBoundBuffer(); + entry.compute_info = registry->GetComputeInfo(); entry.keys = registry->GetKeys(); entry.bound_samplers = registry->GetBoundSamplers(); entry.bindless_samplers = registry->GetBindlessSamplers(); @@ -331,8 +335,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, return; } const auto& entry = (*transferable)[i]; - const u64 unique_identifier = entry.unique_identifier; - const auto it = find_precompiled(unique_identifier); + const u64 uid = entry.unique_identifier; + const auto it = find_precompiled(uid); const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; const bool is_compute = entry.type == ShaderType::Compute; @@ -350,7 +354,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } if (!program) { // Otherwise compile it from GLSL - program = BuildShader(device, entry.type, unique_identifier, ir, true); + program = BuildShader(device, entry.type, uid, ir, *registry, true); } PrecompiledShader shader; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 308e57aae..48a25f1f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -36,6 +36,7 @@ using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::Register; +using VideoCommon::Shader::Registry; using namespace std::string_literals; using namespace VideoCommon::Shader; @@ -288,6 +289,30 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { } } +/// Describes primitive behavior on geometry shaders +std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return {"points", 1}; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineStrip: + return {"lines", 2}; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + return {"lines_adjacency", 4}; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + return {"triangles", 3}; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + return {"triangles_adjacency", 6}; + default: + UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); + return {"points", 1}; + } +} + /// Generates code to use for a swizzle operation. constexpr const char* GetSwizzle(std::size_t element) { constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; @@ -367,15 +392,17 @@ std::string FlowStackTopName(MetaStackClass stack) { class GLSLDecompiler final { public: - explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, - std::string_view suffix) - : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} + explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view suffix) + : device{device}, ir{ir}, registry{registry}, stage{stage}, suffix{suffix}, + header{ir.GetHeader()} {} void Decompile() { DeclareHeader(); DeclareVertex(); DeclareGeometry(); DeclareFragment(); + DeclareCompute(); DeclareRegisters(); DeclareCustomVariables(); DeclarePredicates(); @@ -489,9 +516,15 @@ private: return; } + const auto& info = registry.GetGraphicsInfo(); + const auto input_topology = info.primitive_topology; + const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); + max_input_vertices = max_vertices; + code.AddLine("layout ({}) in;", glsl_topology); + const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); + const auto max_output_vertices = header.common4.max_output_vertices.Value(); + code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); code.AddNewLine(); code.AddLine("in gl_PerVertex {{"); @@ -513,7 +546,8 @@ private: if (!IsRenderTargetEnabled(render_target)) { continue; } - code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target); + code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, + render_target); any = true; } if (any) { @@ -521,6 +555,20 @@ private: } } + void DeclareCompute() { + if (stage != ShaderType::Compute) { + return; + } + const auto& info = registry.GetComputeInfo(); + if (const u32 size = info.shared_memory_size_in_words; size > 0) { + code.AddLine("shared uint smem[];", size); + code.AddNewLine(); + } + code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", + info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); + code.AddNewLine(); + } + void DeclareVertexRedeclarations() { code.AddLine("out gl_PerVertex {{"); ++code.scope; @@ -596,18 +644,16 @@ private: } void DeclareLocalMemory() { + u64 local_memory_size = 0; if (stage == ShaderType::Compute) { - code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); - code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); - code.AddLine("#endif"); - return; + local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; + } else { + local_memory_size = header.GetLocalMemorySize(); } - - const u64 local_memory_size = header.GetLocalMemorySize(); if (local_memory_size == 0) { return; } - const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; + const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); code.AddNewLine(); } @@ -996,7 +1042,8 @@ private: // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games // set an 0x80000000 index for those and the shader fails to build. Find out why // this happens and what's its intent. - return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); + return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), + max_input_vertices.value()); } return std::string(name); }; @@ -2428,11 +2475,14 @@ private: const Device& device; const ShaderIR& ir; + const Registry& registry; const ShaderType stage; const std::string_view suffix; const Header header; ShaderWriter code; + + std::optional<u32> max_input_vertices; }; std::string GetFlowVariable(u32 index) { @@ -2647,9 +2697,9 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { return entries; } -std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage, - std::string_view suffix) { - GLSLDecompiler decompiler(device, ir, stage, suffix); +std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view suffix) { + GLSLDecompiler decompiler(device, ir, registry, stage, suffix); decompiler.Decompile(); return decompiler.GetResult(); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index ae97ab504..68b68ee77 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -12,12 +12,9 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" -namespace VideoCommon::Shader { -class ShaderIR; -} - namespace OpenGL { class Device; @@ -80,6 +77,7 @@ struct ShaderEntries { ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, Tegra::Engines::ShaderType stage, std::string_view suffix = {}); } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 0e1717c5e..5d5118058 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -48,7 +48,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler; }; -constexpr u32 NativeVersion = 16; +constexpr u32 NativeVersion = 17; ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -83,15 +83,16 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { return false; } - bool is_texture_handler_size_known; + u8 is_texture_handler_size_known; u32 texture_handler_size_value; u32 num_keys; u32 num_bound_samplers; u32 num_bindless_samplers; if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || file.ReadArray(&is_texture_handler_size_known, 1) != 1 || - file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || - file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&texture_handler_size_value, 1) != 1 || + file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || + file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { return false; } @@ -136,8 +137,9 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { } if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || - file.WriteObject(texture_handler_size.has_value()) != 1 || + file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 || file.WriteObject(texture_handler_size.value_or(0)) != 1 || + file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || file.WriteObject(static_cast<u32>(keys.size())) != 1 || file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 7f2ab36be..d5be52e40 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -51,8 +51,10 @@ struct ShaderDiskCacheEntry { ProgramCode code_b; u64 unique_identifier = 0; - u32 bound_buffer = 0; std::optional<u32> texture_handler_size; + u32 bound_buffer = 0; + VideoCommon::Shader::GraphicsInfo graphics_info; + VideoCommon::Shader::ComputeInfo compute_info; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 7126caf98..dc2d3dce3 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -6,21 +6,55 @@ #include <tuple> #include "common/common_types.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/shader/registry.h" namespace VideoCommon::Shader { +using Tegra::Engines::ConstBufferEngineInterface; using Tegra::Engines::SamplerDescriptor; +using Tegra::Engines::ShaderType; -Registry::Registry(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile) - : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} +namespace { + +GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage == ShaderType::Compute) { + return {}; + } + auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); + + GraphicsInfo info; + info.primitive_topology = graphics.regs.draw.topology; + return info; +} + +ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage != ShaderType::Compute) { + return {}; + } + auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); + const auto& launch = compute.launch_description; + + ComputeInfo info; + info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; + info.local_memory_size_in_words = launch.local_pos_alloc; + info.shared_memory_size_in_words = launch.shared_alloc; + return info; +} + +} // Anonymous namespace + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) + : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, + bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} Registry::Registry(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine} {} + : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, + graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( + shader_stage, engine)} {} Registry::~Registry() = default; @@ -67,18 +101,6 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler return value; } -std::optional<u32> Registry::ObtainBoundBuffer() { - if (bound_buffer_saved) { - return bound_buffer; - } - if (!engine) { - return std::nullopt; - } - bound_buffer_saved = true; - bound_buffer = engine->GetBoundBuffer(); - return bound_buffer; -} - void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -91,11 +113,6 @@ void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor s bindless_samplers.insert_or_assign({buffer, offset}, sampler); } -void Registry::SetBoundBuffer(u32 buffer) { - bound_buffer_saved = true; - bound_buffer = buffer; -} - bool Registry::IsConsistent() const { if (!engine) { return true; diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index a5487e1d7..c1a04ea02 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -4,11 +4,16 @@ #pragma once +#include <array> #include <optional> +#include <type_traits> #include <unordered_map> +#include <utility> + #include "common/common_types.h" #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/guest_driver.h" @@ -19,6 +24,25 @@ using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescripto using BindlessSamplerMap = std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; +struct GraphicsInfo { + Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; +}; +static_assert(std::is_trivially_copyable_v<GraphicsInfo>); + +struct ComputeInfo { + std::array<u32, 3> workgroup_size{}; + u32 shared_memory_size_in_words = 0; + u32 local_memory_size_in_words = 0; +}; +static_assert(std::is_trivially_copyable_v<ComputeInfo>); + +struct SerializedRegistryInfo { + VideoCore::GuestDriverProfile guest_driver_profile; + u32 bound_buffer = 0; + GraphicsInfo graphics; + ComputeInfo compute; +}; + /** * The Registry is a class use to interface the 3D and compute engines with the shader compiler. * With it, the shader can obtain required data from GPU state and store it for disk shader @@ -26,8 +50,7 @@ using BindlessSamplerMap = */ class Registry { public: - explicit Registry(Tegra::Engines::ShaderType shader_stage, - VideoCore::GuestDriverProfile stored_guest_driver_profile); + explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); explicit Registry(Tegra::Engines::ShaderType shader_stage, Tegra::Engines::ConstBufferEngineInterface& engine); @@ -42,8 +65,6 @@ public: std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); - std::optional<u32> ObtainBoundBuffer(); - /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -53,9 +74,6 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - /// Set the bound buffer for this registry. - void SetBoundBuffer(u32 buffer); - /// Checks keys and samplers against engine's current const buffers. /// Returns true if they are the same value, false otherwise. bool IsConsistent() const; @@ -83,6 +101,18 @@ public: return bound_buffer; } + /// Returns compute information from this shader + const GraphicsInfo& GetGraphicsInfo() const { + ASSERT(stage != Tegra::Engines::ShaderType::Compute); + return graphics_info; + } + + /// Returns compute information from this shader + const ComputeInfo& GetComputeInfo() const { + ASSERT(stage == Tegra::Engines::ShaderType::Compute); + return compute_info; + } + /// Obtains access to the guest driver's profile. VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; @@ -95,8 +125,9 @@ private: KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; - bool bound_buffer_saved{}; - u32 bound_buffer{}; + u32 bound_buffer; + GraphicsInfo graphics_info; + ComputeInfo compute_info; }; } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 831219841..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -81,14 +81,11 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { - const auto bound_buffer = registry.ObtainBoundBuffer(); - if (!bound_buffer) { + const u32 bound_buffer = registry.GetBoundBuffer(); + if (bound_buffer != cbuf->GetIndex()) { return {}; } - if (*bound_buffer != cbuf->GetIndex()) { - return {}; - } - auto pair = DecoupleIndirectRead(*operation); + const auto pair = DecoupleIndirectRead(*operation); if (!pair) { return {}; } |