diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 126 | ||||
-rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 103 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.cpp | 13 | ||||
-rw-r--r-- | src/video_core/shader/control_flow.h | 3 | ||||
-rw-r--r-- | src/video_core/shader/decode.cpp | 22 | ||||
-rw-r--r-- | src/video_core/shader/decode/texture.cpp | 5 | ||||
-rw-r--r-- | src/video_core/shader/registry.cpp | 161 | ||||
-rw-r--r-- | src/video_core/shader/registry.h | 137 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.cpp | 5 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 6 | ||||
-rw-r--r-- | src/video_core/shader/track.cpp | 18 | ||||
-rw-r--r-- | src/video_core/shader/transform_feedback.cpp | 115 | ||||
-rw-r--r-- | src/video_core/shader/transform_feedback.h | 23 |
13 files changed, 467 insertions, 270 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp deleted file mode 100644 index 0638be8cb..000000000 --- a/src/video_core/shader/const_buffer_locker.cpp +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <tuple> - -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/shader/const_buffer_locker.h" - -namespace VideoCommon::Shader { - -using Tegra::Engines::SamplerDescriptor; - -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) - : stage{shader_stage} {} - -ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine} {} - -ConstBufferLocker::~ConstBufferLocker() = default; - -std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { - const std::pair<u32, u32> key = {buffer, offset}; - const auto iter = keys.find(key); - if (iter != keys.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); - keys.emplace(key, value); - return value; -} - -std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { - const u32 key = offset; - const auto iter = bound_samplers.find(key); - if (iter != bound_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); - bound_samplers.emplace(key, value); - return value; -} - -std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( - u32 buffer, u32 offset) { - const std::pair key = {buffer, offset}; - const auto iter = bindless_samplers.find(key); - if (iter != bindless_samplers.end()) { - return iter->second; - } - if (!engine) { - return std::nullopt; - } - const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); - bindless_samplers.emplace(key, value); - return value; -} - -std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { - if (bound_buffer_saved) { - return bound_buffer; - } - if (!engine) { - return std::nullopt; - } - bound_buffer_saved = true; - bound_buffer = engine->GetBoundBuffer(); - return bound_buffer; -} - -void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { - keys.insert_or_assign({buffer, offset}, value); -} - -void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { - bound_samplers.insert_or_assign(offset, sampler); -} - -void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { - bindless_samplers.insert_or_assign({buffer, offset}, sampler); -} - -void ConstBufferLocker::SetBoundBuffer(u32 buffer) { - bound_buffer_saved = true; - bound_buffer = buffer; -} - -bool ConstBufferLocker::IsConsistent() const { - if (!engine) { - return false; - } - return std::all_of(keys.begin(), keys.end(), - [this](const auto& pair) { - const auto [cbuf, offset] = pair.first; - const auto value = pair.second; - return value == engine->AccessConstBuffer32(stage, cbuf, offset); - }) && - std::all_of(bound_samplers.begin(), bound_samplers.end(), - [this](const auto& sampler) { - const auto [key, value] = sampler; - return value == engine->AccessBoundSampler(stage, key); - }) && - std::all_of(bindless_samplers.begin(), bindless_samplers.end(), - [this](const auto& sampler) { - const auto [cbuf, offset] = sampler.first; - const auto value = sampler.second; - return value == engine->AccessBindlessSampler(stage, cbuf, offset); - }); -} - -bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { - return std::tie(keys, bound_samplers, bindless_samplers) == - std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); -} - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h deleted file mode 100644 index d3ea11087..000000000 --- a/src/video_core/shader/const_buffer_locker.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <optional> -#include <unordered_map> -#include "common/common_types.h" -#include "common/hash.h" -#include "video_core/engines/const_buffer_engine_interface.h" -#include "video_core/engines/shader_type.h" -#include "video_core/guest_driver.h" - -namespace VideoCommon::Shader { - -using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; -using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; -using BindlessSamplerMap = - std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; - -/** - * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader - * compiler. with it, the shader can obtain required data from GPU state and store it for disk - * shader compilation. - */ -class ConstBufferLocker { -public: - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); - - explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine); - - ~ConstBufferLocker(); - - /// Retrieves a key from the locker, if it's registered, it will give the registered value, if - /// not it will obtain it from maxwell3d and register it. - std::optional<u32> ObtainKey(u32 buffer, u32 offset); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); - - std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); - - std::optional<u32> ObtainBoundBuffer(); - - /// Inserts a key. - void InsertKey(u32 buffer, u32 offset, u32 value); - - /// Inserts a bound sampler key. - void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Inserts a bindless sampler key. - void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); - - /// Set the bound buffer for this locker. - void SetBoundBuffer(u32 buffer); - - /// Checks keys and samplers against engine's current const buffers. Returns true if they are - /// the same value, false otherwise; - bool IsConsistent() const; - - /// Returns true if the keys are equal to the other ones in the locker. - bool HasEqualKeys(const ConstBufferLocker& rhs) const; - - /// Gives an getter to the const buffer keys in the database. - const KeyMap& GetKeys() const { - return keys; - } - - /// Gets samplers database. - const BoundSamplerMap& GetBoundSamplers() const { - return bound_samplers; - } - - /// Gets bindless samplers database. - const BindlessSamplerMap& GetBindlessSamplers() const { - return bindless_samplers; - } - - /// Gets bound buffer used on this shader - u32 GetBoundBuffer() const { - return bound_buffer; - } - - /// Obtains access to the guest driver's profile. - VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { - if (engine) { - return &engine->AccessGuestDriverProfile(); - } - return nullptr; - } - -private: - const Tegra::Engines::ShaderType stage; - Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; - KeyMap keys; - BoundSamplerMap bound_samplers; - BindlessSamplerMap bindless_samplers; - bool bound_buffer_saved{}; - u32 bound_buffer{}; -}; - -} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 0229733b6..2e2711350 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -13,6 +13,7 @@ #include "common/common_types.h" #include "video_core/shader/ast.h" #include "video_core/shader/control_flow.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -64,11 +65,11 @@ struct BlockInfo { }; struct CFGRebuildState { - explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) - : program_code{program_code}, locker{locker}, start{start} {} + explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) + : program_code{program_code}, registry{registry}, start{start} {} const ProgramCode& program_code; - ConstBufferLocker& locker; + Registry& registry; u32 start{}; std::vector<BlockInfo> block_info; std::list<u32> inspect_queries; @@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) const s32 pc_target = offset + result.relative_position; std::vector<CaseBranch> branches; for (u32 i = 0; i < result.entries; i++) { - auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); + auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); if (!key) { return {ParseResult::AbnormalFlow, parse_info}; } @@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) { std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, - ConstBufferLocker& locker) { + Registry& registry) { auto result_out = std::make_unique<ShaderCharacteristics>(); if (settings.depth == CompileDepth::BruteForce) { result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{program_code, start_address, locker}; + CFGRebuildState state{program_code, start_address, registry}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5304998b9..62a3510d8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -12,6 +12,7 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -111,6 +112,6 @@ struct ShaderCharacteristics { std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, const CompilerSettings& settings, - ConstBufferLocker& locker); + Registry& registry); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..87ac9ac6c 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, const std::list<Sampler>& used_samplers) { - if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); - return; - } - if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { return; } u32 count{}; @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, bound_offsets.emplace_back(sampler.GetOffset()); } if (count > 1) { - gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); } } std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, - VideoCore::GuestDriverProfile* gpu_driver, + VideoCore::GuestDriverProfile& gpu_driver, const std::list<Sampler>& used_samplers) { - if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); - return std::nullopt; - } const u32 base_offset = sampler_to_deduce.GetOffset(); u32 max_offset{std::numeric_limits<u32>::max()}; for (const auto& sampler : used_samplers) { @@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, if (max_offset == std::numeric_limits<u32>::max()) { return std::nullopt; } - return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); + return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); } } // Anonymous namespace @@ -149,7 +141,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, main_offset, settings, locker); + auto info = ScanFlow(program_code, main_offset, settings, registry); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { void ShaderIR::PostDecode() { // Deduce texture handler size if needed - auto gpu_driver = locker.AccessGuestDriverProfile(); + auto gpu_driver = registry.AccessGuestDriverProfile(); DeduceTextureHandlerSize(gpu_driver, used_samplers); // Deduce Indexed Samplers if (!uses_indexed_samplers) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bee7d8cad..48350e042 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -12,6 +12,7 @@ #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample if (sampler_info) { return *sampler_info; } - const auto sampler = - buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); + const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) + : registry.ObtainBoundSampler(offset); if (!sampler) { LOG_WARNING(HW_GPU, "Unknown sampler info"); return SamplerInfo{TextureType::Texture2D, false, false, false}; diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp new file mode 100644 index 000000000..af70b3f35 --- /dev/null +++ b/src/video_core/shader/registry.cpp @@ -0,0 +1,161 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <tuple> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" + +namespace VideoCommon::Shader { + +using Tegra::Engines::ConstBufferEngineInterface; +using Tegra::Engines::SamplerDescriptor; +using Tegra::Engines::ShaderType; + +namespace { + +GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage == ShaderType::Compute) { + return {}; + } + auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); + + GraphicsInfo info; + info.tfb_layouts = graphics.regs.tfb_layouts; + info.tfb_varying_locs = graphics.regs.tfb_varying_locs; + info.primitive_topology = graphics.regs.draw.topology; + info.tessellation_primitive = graphics.regs.tess_mode.prim; + info.tessellation_spacing = graphics.regs.tess_mode.spacing; + info.tfb_enabled = graphics.regs.tfb_enabled; + info.tessellation_clockwise = graphics.regs.tess_mode.cw; + return info; +} + +ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { + if (shader_stage != ShaderType::Compute) { + return {}; + } + auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); + const auto& launch = compute.launch_description; + + ComputeInfo info; + info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; + info.local_memory_size_in_words = launch.local_pos_alloc; + info.shared_memory_size_in_words = launch.shared_alloc; + return info; +} + +} // Anonymous namespace + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) + : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, + bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} + +Registry::Registry(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine) + : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, + graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( + shader_stage, engine)} {} + +Registry::~Registry() = default; + +std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { + const std::pair<u32, u32> key = {buffer, offset}; + const auto iter = keys.find(key); + if (iter != keys.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); + keys.emplace(key, value); + return value; +} + +std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { + const u32 key = offset; + const auto iter = bound_samplers.find(key); + if (iter != bound_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); + bound_samplers.emplace(key, value); + return value; +} + +std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, + u32 offset) { + const std::pair key = {buffer, offset}; + const auto iter = bindless_samplers.find(key); + if (iter != bindless_samplers.end()) { + return iter->second; + } + if (!engine) { + return std::nullopt; + } + const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); + bindless_samplers.emplace(key, value); + return value; +} + +void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { + keys.insert_or_assign({buffer, offset}, value); +} + +void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { + bound_samplers.insert_or_assign(offset, sampler); +} + +void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { + bindless_samplers.insert_or_assign({buffer, offset}, sampler); +} + +bool Registry::IsConsistent() const { + if (!engine) { + return true; + } + return std::all_of(keys.begin(), keys.end(), + [this](const auto& pair) { + const auto [cbuf, offset] = pair.first; + const auto value = pair.second; + return value == engine->AccessConstBuffer32(stage, cbuf, offset); + }) && + std::all_of(bound_samplers.begin(), bound_samplers.end(), + [this](const auto& sampler) { + const auto [key, value] = sampler; + return value == engine->AccessBoundSampler(stage, key); + }) && + std::all_of(bindless_samplers.begin(), bindless_samplers.end(), + [this](const auto& sampler) { + const auto [cbuf, offset] = sampler.first; + const auto value = sampler.second; + return value == engine->AccessBindlessSampler(stage, cbuf, offset); + }); +} + +bool Registry::HasEqualKeys(const Registry& rhs) const { + return std::tie(keys, bound_samplers, bindless_samplers) == + std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); +} + +const GraphicsInfo& Registry::GetGraphicsInfo() const { + ASSERT(stage != Tegra::Engines::ShaderType::Compute); + return graphics_info; +} + +const ComputeInfo& Registry::GetComputeInfo() const { + ASSERT(stage == Tegra::Engines::ShaderType::Compute); + return compute_info; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h new file mode 100644 index 000000000..0c80d35fd --- /dev/null +++ b/src/video_core/shader/registry.h @@ -0,0 +1,137 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <optional> +#include <type_traits> +#include <unordered_map> +#include <utility> + +#include "common/common_types.h" +#include "common/hash.h" +#include "video_core/engines/const_buffer_engine_interface.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" + +namespace VideoCommon::Shader { + +using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; +using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; +using BindlessSamplerMap = + std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; + +struct GraphicsInfo { + using Maxwell = Tegra::Engines::Maxwell3D::Regs; + + std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> + tfb_layouts{}; + std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; + Maxwell::PrimitiveTopology primitive_topology{}; + Maxwell::TessellationPrimitive tessellation_primitive{}; + Maxwell::TessellationSpacing tessellation_spacing{}; + bool tfb_enabled = false; + bool tessellation_clockwise = false; +}; +static_assert(std::is_trivially_copyable_v<GraphicsInfo> && + std::is_standard_layout_v<GraphicsInfo>); + +struct ComputeInfo { + std::array<u32, 3> workgroup_size{}; + u32 shared_memory_size_in_words = 0; + u32 local_memory_size_in_words = 0; +}; +static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); + +struct SerializedRegistryInfo { + VideoCore::GuestDriverProfile guest_driver_profile; + u32 bound_buffer = 0; + GraphicsInfo graphics; + ComputeInfo compute; +}; + +/** + * The Registry is a class use to interface the 3D and compute engines with the shader compiler. + * With it, the shader can obtain required data from GPU state and store it for disk shader + * compilation. + */ +class Registry { +public: + explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); + + explicit Registry(Tegra::Engines::ShaderType shader_stage, + Tegra::Engines::ConstBufferEngineInterface& engine); + + ~Registry(); + + /// Retrieves a key from the registry, if it's registered, it will give the registered value, if + /// not it will obtain it from maxwell3d and register it. + std::optional<u32> ObtainKey(u32 buffer, u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); + + std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); + + /// Inserts a key. + void InsertKey(u32 buffer, u32 offset, u32 value); + + /// Inserts a bound sampler key. + void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Inserts a bindless sampler key. + void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + + /// Checks keys and samplers against engine's current const buffers. + /// Returns true if they are the same value, false otherwise. + bool IsConsistent() const; + + /// Returns true if the keys are equal to the other ones in the registry. + bool HasEqualKeys(const Registry& rhs) const; + + /// Returns graphics information from this shader + const GraphicsInfo& GetGraphicsInfo() const; + + /// Returns compute information from this shader + const ComputeInfo& GetComputeInfo() const; + + /// Gives an getter to the const buffer keys in the database. + const KeyMap& GetKeys() const { + return keys; + } + + /// Gets samplers database. + const BoundSamplerMap& GetBoundSamplers() const { + return bound_samplers; + } + + /// Gets bindless samplers database. + const BindlessSamplerMap& GetBindlessSamplers() const { + return bindless_samplers; + } + + /// Gets bound buffer used on this shader + u32 GetBoundBuffer() const { + return bound_buffer; + } + + /// Obtains access to the guest driver's profile. + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { + return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; + } + +private: + const Tegra::Engines::ShaderType stage; + VideoCore::GuestDriverProfile stored_guest_driver_profile; + Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; + KeyMap keys; + BoundSamplerMap bound_samplers; + BindlessSamplerMap bindless_samplers; + u32 bound_buffer; + GraphicsInfo graphics_info; + ComputeInfo compute_info; +}; + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 3a5d280a9..425927777 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -11,6 +11,7 @@ #include "common/logging/log.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation; using Tegra::Shader::Register; ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - ConstBufferLocker& locker) - : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { + Registry& registry) + : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { Decode(); PostDecode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b0851c3be..dde036b40 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -18,8 +18,8 @@ #include "video_core/engines/shader_header.h" #include "video_core/shader/ast.h" #include "video_core/shader/compiler_settings.h" -#include "video_core/shader/const_buffer_locker.h" #include "video_core/shader/node.h" +#include "video_core/shader/registry.h" namespace VideoCommon::Shader { @@ -69,7 +69,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, - ConstBufferLocker& locker); + Registry& registry); ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { @@ -414,7 +414,7 @@ private: const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; - ConstBufferLocker& locker; + Registry& registry; bool decompiled{}; bool disable_flow_stack{}; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 15e22b9fa..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { - auto bound_buffer = locker.ObtainBoundBuffer(); - if (!bound_buffer) { + const u32 bound_buffer = registry.GetBoundBuffer(); + if (bound_buffer != cbuf->GetIndex()) { return {}; } - if (*bound_buffer != cbuf->GetIndex()) { - return {}; - } - auto pair = DecoupleIndirectRead(*operation); + const auto pair = DecoupleIndirectRead(*operation); if (!pair) { return {}; } auto [gpr, base_offset] = *pair; const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); - auto gpu_driver = locker.AccessGuestDriverProfile(); - if (gpu_driver == nullptr) { - return {}; - } + const auto& gpu_driver = registry.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); - const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, - Immediate(gpu_driver->GetTextureHandlerSize())); + const Node op = + Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); const Node cv_node = GetCustomVariable(bindless_cv); Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp new file mode 100644 index 000000000..22a933761 --- /dev/null +++ b/src/video_core/shader/transform_feedback.cpp @@ -0,0 +1,115 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <unordered_map> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/shader/registry.h" +#include "video_core/shader/transform_feedback.h" + +namespace VideoCommon::Shader { + +namespace { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 + +/// Attribute offsets that describe a vector +constexpr std::array VECTORS = { + 28, // gl_Position + 32, // Generic 0 + 36, // Generic 1 + 40, // Generic 2 + 44, // Generic 3 + 48, // Generic 4 + 52, // Generic 5 + 56, // Generic 6 + 60, // Generic 7 + 64, // Generic 8 + 68, // Generic 9 + 72, // Generic 10 + 76, // Generic 11 + 80, // Generic 12 + 84, // Generic 13 + 88, // Generic 14 + 92, // Generic 15 + 96, // Generic 16 + 100, // Generic 17 + 104, // Generic 18 + 108, // Generic 19 + 112, // Generic 20 + 116, // Generic 21 + 120, // Generic 22 + 124, // Generic 23 + 128, // Generic 24 + 132, // Generic 25 + 136, // Generic 26 + 140, // Generic 27 + 144, // Generic 28 + 148, // Generic 29 + 152, // Generic 30 + 156, // Generic 31 + 160, // gl_FrontColor + 164, // gl_FrontSecondaryColor + 160, // gl_BackColor + 164, // gl_BackSecondaryColor + 192, // gl_TexCoord[0] + 196, // gl_TexCoord[1] + 200, // gl_TexCoord[2] + 204, // gl_TexCoord[3] + 208, // gl_TexCoord[4] + 212, // gl_TexCoord[5] + 216, // gl_TexCoord[6] + 220, // gl_TexCoord[7] +}; +} // namespace + +std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { + + std::unordered_map<u8, VaryingTFB> tfb; + + for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { + const auto& locations = info.tfb_varying_locs[buffer]; + const auto& layout = info.tfb_layouts[buffer]; + const std::size_t varying_count = layout.varying_count; + + std::size_t highest = 0; + + for (std::size_t offset = 0; offset < varying_count; ++offset) { + const std::size_t base_offset = offset; + const u8 location = locations[offset]; + + VaryingTFB varying; + varying.buffer = layout.stream; + varying.stride = layout.stride; + varying.offset = offset * sizeof(u32); + varying.components = 1; + + if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { + UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); + + const u8 base_index = location / 4; + while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { + ++offset; + ++varying.components; + } + } + + [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; + UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); + + highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); + } + + UNIMPLEMENTED_IF(highest != layout.stride); + } + return tfb; +} + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h new file mode 100644 index 000000000..77d05f64c --- /dev/null +++ b/src/video_core/shader/transform_feedback.h @@ -0,0 +1,23 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <unordered_map> + +#include "common/common_types.h" +#include "video_core/shader/registry.h" + +namespace VideoCommon::Shader { + +struct VaryingTFB { + std::size_t buffer; + std::size_t stride; + std::size_t offset; + std::size_t components; +}; + +std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); + +} // namespace VideoCommon::Shader |