summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp126
-rw-r--r--src/video_core/shader/const_buffer_locker.h103
-rw-r--r--src/video_core/shader/control_flow.cpp13
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp22
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/decode/texture.cpp5
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/registry.cpp161
-rw-r--r--src/video_core/shader/registry.h137
-rw-r--r--src/video_core/shader/shader_ir.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h6
-rw-r--r--src/video_core/shader/track.cpp18
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
15 files changed, 517 insertions, 291 deletions
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
deleted file mode 100644
index 0638be8cb..000000000
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <tuple>
-
-#include "common/common_types.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/shader/const_buffer_locker.h"
-
-namespace VideoCommon::Shader {
-
-using Tegra::Engines::SamplerDescriptor;
-
-ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
- : stage{shader_stage} {}
-
-ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine)
- : stage{shader_stage}, engine{&engine} {}
-
-ConstBufferLocker::~ConstBufferLocker() = default;
-
-std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
- const std::pair<u32, u32> key = {buffer, offset};
- const auto iter = keys.find(key);
- if (iter != keys.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
- keys.emplace(key, value);
- return value;
-}
-
-std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
- const u32 key = offset;
- const auto iter = bound_samplers.find(key);
- if (iter != bound_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
- bound_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
- u32 buffer, u32 offset) {
- const std::pair key = {buffer, offset};
- const auto iter = bindless_samplers.find(key);
- if (iter != bindless_samplers.end()) {
- return iter->second;
- }
- if (!engine) {
- return std::nullopt;
- }
- const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
- bindless_samplers.emplace(key, value);
- return value;
-}
-
-std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
- if (bound_buffer_saved) {
- return bound_buffer;
- }
- if (!engine) {
- return std::nullopt;
- }
- bound_buffer_saved = true;
- bound_buffer = engine->GetBoundBuffer();
- return bound_buffer;
-}
-
-void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
- keys.insert_or_assign({buffer, offset}, value);
-}
-
-void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
- bound_samplers.insert_or_assign(offset, sampler);
-}
-
-void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
- bindless_samplers.insert_or_assign({buffer, offset}, sampler);
-}
-
-void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
- bound_buffer_saved = true;
- bound_buffer = buffer;
-}
-
-bool ConstBufferLocker::IsConsistent() const {
- if (!engine) {
- return false;
- }
- return std::all_of(keys.begin(), keys.end(),
- [this](const auto& pair) {
- const auto [cbuf, offset] = pair.first;
- const auto value = pair.second;
- return value == engine->AccessConstBuffer32(stage, cbuf, offset);
- }) &&
- std::all_of(bound_samplers.begin(), bound_samplers.end(),
- [this](const auto& sampler) {
- const auto [key, value] = sampler;
- return value == engine->AccessBoundSampler(stage, key);
- }) &&
- std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
- [this](const auto& sampler) {
- const auto [cbuf, offset] = sampler.first;
- const auto value = sampler.second;
- return value == engine->AccessBindlessSampler(stage, cbuf, offset);
- });
-}
-
-bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
- return std::tie(keys, bound_samplers, bindless_samplers) ==
- std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
-}
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
deleted file mode 100644
index d3ea11087..000000000
--- a/src/video_core/shader/const_buffer_locker.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <optional>
-#include <unordered_map>
-#include "common/common_types.h"
-#include "common/hash.h"
-#include "video_core/engines/const_buffer_engine_interface.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/guest_driver.h"
-
-namespace VideoCommon::Shader {
-
-using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
-using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
-using BindlessSamplerMap =
- std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
-
-/**
- * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
- * compiler. with it, the shader can obtain required data from GPU state and store it for disk
- * shader compilation.
- */
-class ConstBufferLocker {
-public:
- explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
-
- explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine);
-
- ~ConstBufferLocker();
-
- /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
- /// not it will obtain it from maxwell3d and register it.
- std::optional<u32> ObtainKey(u32 buffer, u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
-
- std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
-
- std::optional<u32> ObtainBoundBuffer();
-
- /// Inserts a key.
- void InsertKey(u32 buffer, u32 offset, u32 value);
-
- /// Inserts a bound sampler key.
- void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Inserts a bindless sampler key.
- void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
-
- /// Set the bound buffer for this locker.
- void SetBoundBuffer(u32 buffer);
-
- /// Checks keys and samplers against engine's current const buffers. Returns true if they are
- /// the same value, false otherwise;
- bool IsConsistent() const;
-
- /// Returns true if the keys are equal to the other ones in the locker.
- bool HasEqualKeys(const ConstBufferLocker& rhs) const;
-
- /// Gives an getter to the const buffer keys in the database.
- const KeyMap& GetKeys() const {
- return keys;
- }
-
- /// Gets samplers database.
- const BoundSamplerMap& GetBoundSamplers() const {
- return bound_samplers;
- }
-
- /// Gets bindless samplers database.
- const BindlessSamplerMap& GetBindlessSamplers() const {
- return bindless_samplers;
- }
-
- /// Gets bound buffer used on this shader
- u32 GetBoundBuffer() const {
- return bound_buffer;
- }
-
- /// Obtains access to the guest driver's profile.
- VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
- if (engine) {
- return &engine->AccessGuestDriverProfile();
- }
- return nullptr;
- }
-
-private:
- const Tegra::Engines::ShaderType stage;
- Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
- KeyMap keys;
- BoundSamplerMap bound_samplers;
- BindlessSamplerMap bindless_samplers;
- bool bound_buffer_saved{};
- u32 bound_buffer{};
-};
-
-} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 0229733b6..2e2711350 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
#include "common/common_types.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/control_flow.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
};
struct CFGRebuildState {
- explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
- : program_code{program_code}, locker{locker}, start{start} {}
+ explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
+ : program_code{program_code}, registry{registry}, start{start} {}
const ProgramCode& program_code;
- ConstBufferLocker& locker;
+ Registry& registry;
u32 start{};
std::vector<BlockInfo> block_info;
std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
const s32 pc_target = offset + result.relative_position;
std::vector<CaseBranch> branches;
for (u32 i = 0; i < result.entries; i++) {
- auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+ auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
if (!key) {
return {ParseResult::AbnormalFlow, parse_info};
}
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
- ConstBufferLocker& locker) {
+ Registry& registry) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
- CFGRebuildState state{program_code, start_address, locker};
+ CFGRebuildState state{program_code, start_address, registry};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5304998b9..62a3510d8 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -12,6 +12,7 @@
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
const CompilerSettings& settings,
- ConstBufferLocker& locker);
+ Registry& registry);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6b697ed5d..87ac9ac6c 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
return (absolute_offset % SchedPeriod) == 0;
}
-void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
- if (gpu_driver == nullptr) {
- LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
- return;
- }
- if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+ if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
return;
}
u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
bound_offsets.emplace_back(sampler.GetOffset());
}
if (count > 1) {
- gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+ gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
}
}
std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
- VideoCore::GuestDriverProfile* gpu_driver,
+ VideoCore::GuestDriverProfile& gpu_driver,
const std::list<Sampler>& used_samplers) {
- if (gpu_driver == nullptr) {
- LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
- return std::nullopt;
- }
const u32 base_offset = sampler_to_deduce.GetOffset();
u32 max_offset{std::numeric_limits<u32>::max()};
for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
if (max_offset == std::numeric_limits<u32>::max()) {
return std::nullopt;
}
- return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
+ return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
}
} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
- auto info = ScanFlow(program_code, main_offset, settings, locker);
+ auto info = ScanFlow(program_code, main_offset, settings, registry);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
void ShaderIR::PostDecode() {
// Deduce texture handler size if needed
- auto gpu_driver = locker.AccessGuestDriverProfile();
+ auto gpu_driver = registry.AccessGuestDriverProfile();
DeduceTextureHandlerSize(gpu_driver, used_samplers);
// Deduce Indexed Samplers
if (!uses_indexed_samplers) {
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- UNIMPLEMENTED_IF(instr.bfe.negate_b);
-
Node op_a = GetRegister(instr.gpr8);
- op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
-
- switch (opcode->get().GetId()) {
- case OpCode::Id::BFE_IMM: {
- UNIMPLEMENTED_IF_MSG(instr.generates_cc,
- "Condition codes generation in BFE is not implemented");
+ Node op_b = [&] {
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::BFE_R:
+ return GetRegister(instr.gpr20);
+ case OpCode::Id::BFE_C:
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
+ case OpCode::Id::BFE_IMM:
+ return Immediate(instr.alu.GetSignedImm20_20());
+ default:
+ UNREACHABLE();
+ return Immediate(0);
+ }
+ }();
- const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
- const Node outer_shift_imm =
- Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
+ UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
- const Node inner_shift =
- Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
- const Node outer_shift =
- Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
+ const bool is_signed = instr.bfe.is_signed;
- SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
- SetRegister(bb, instr.gpr0, outer_shift);
- break;
- }
- default:
- UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
+ // using reverse parallel method in
+ // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+ // note for later if possible to implement faster method.
+ if (instr.bfe.brev) {
+ const auto swap = [&](u32 s, u32 mask) {
+ Node v1 =
+ SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
+ if (mask != 0) {
+ v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
+ Immediate(mask));
+ }
+ Node v2 = op_a;
+ if (mask != 0) {
+ v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
+ Immediate(mask));
+ }
+ v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
+ Immediate(s));
+ return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
+ std::move(v2));
+ };
+ op_a = swap(1, 0x55555555U);
+ op_a = swap(2, 0x33333333U);
+ op_a = swap(4, 0x0F0F0F0FU);
+ op_a = swap(8, 0x00FF00FFU);
+ op_a = swap(16, 0);
}
+ const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(0), Immediate(8));
+ const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
+ Immediate(8), Immediate(8));
+ auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
+ SetRegister(bb, instr.gpr0, std::move(result));
+
return pc;
}
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bee7d8cad..48350e042 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -12,6 +12,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
if (sampler_info) {
return *sampler_info;
}
- const auto sampler =
- buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset);
+ const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
+ : registry.ObtainBoundSampler(offset);
if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info");
return SamplerInfo{TextureType::Texture2D, false, false, false};
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
return OperationCode::UBitwiseXor;
case OperationCode::IBitwiseNot:
return OperationCode::UBitwiseNot;
+ case OperationCode::IBitfieldExtract:
+ return OperationCode::UBitfieldExtract;
case OperationCode::IBitfieldInsert:
return OperationCode::UBitfieldInsert;
case OperationCode::IBitCount:
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
new file mode 100644
index 000000000..af70b3f35
--- /dev/null
+++ b/src/video_core/shader/registry.cpp
@@ -0,0 +1,161 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <tuple>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Engines::ConstBufferEngineInterface;
+using Tegra::Engines::SamplerDescriptor;
+using Tegra::Engines::ShaderType;
+
+namespace {
+
+GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
+ if (shader_stage == ShaderType::Compute) {
+ return {};
+ }
+ auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+ GraphicsInfo info;
+ info.tfb_layouts = graphics.regs.tfb_layouts;
+ info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
+ info.primitive_topology = graphics.regs.draw.topology;
+ info.tessellation_primitive = graphics.regs.tess_mode.prim;
+ info.tessellation_spacing = graphics.regs.tess_mode.spacing;
+ info.tfb_enabled = graphics.regs.tfb_enabled;
+ info.tessellation_clockwise = graphics.regs.tess_mode.cw;
+ return info;
+}
+
+ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
+ if (shader_stage != ShaderType::Compute) {
+ return {};
+ }
+ auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+ const auto& launch = compute.launch_description;
+
+ ComputeInfo info;
+ info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
+ info.local_memory_size_in_words = launch.local_pos_alloc;
+ info.shared_memory_size_in_words = launch.shared_alloc;
+ return info;
+}
+
+} // Anonymous namespace
+
+Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+ : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
+ bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
+
+Registry::Registry(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine)
+ : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
+ graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
+ shader_stage, engine)} {}
+
+Registry::~Registry() = default;
+
+std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
+ const std::pair<u32, u32> key = {buffer, offset};
+ const auto iter = keys.find(key);
+ if (iter != keys.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
+ keys.emplace(key, value);
+ return value;
+}
+
+std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
+ const u32 key = offset;
+ const auto iter = bound_samplers.find(key);
+ if (iter != bound_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
+ bound_samplers.emplace(key, value);
+ return value;
+}
+
+std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
+ u32 offset) {
+ const std::pair key = {buffer, offset};
+ const auto iter = bindless_samplers.find(key);
+ if (iter != bindless_samplers.end()) {
+ return iter->second;
+ }
+ if (!engine) {
+ return std::nullopt;
+ }
+ const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
+ bindless_samplers.emplace(key, value);
+ return value;
+}
+
+void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
+ keys.insert_or_assign({buffer, offset}, value);
+}
+
+void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
+ bound_samplers.insert_or_assign(offset, sampler);
+}
+
+void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
+ bindless_samplers.insert_or_assign({buffer, offset}, sampler);
+}
+
+bool Registry::IsConsistent() const {
+ if (!engine) {
+ return true;
+ }
+ return std::all_of(keys.begin(), keys.end(),
+ [this](const auto& pair) {
+ const auto [cbuf, offset] = pair.first;
+ const auto value = pair.second;
+ return value == engine->AccessConstBuffer32(stage, cbuf, offset);
+ }) &&
+ std::all_of(bound_samplers.begin(), bound_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [key, value] = sampler;
+ return value == engine->AccessBoundSampler(stage, key);
+ }) &&
+ std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
+ [this](const auto& sampler) {
+ const auto [cbuf, offset] = sampler.first;
+ const auto value = sampler.second;
+ return value == engine->AccessBindlessSampler(stage, cbuf, offset);
+ });
+}
+
+bool Registry::HasEqualKeys(const Registry& rhs) const {
+ return std::tie(keys, bound_samplers, bindless_samplers) ==
+ std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
+}
+
+const GraphicsInfo& Registry::GetGraphicsInfo() const {
+ ASSERT(stage != Tegra::Engines::ShaderType::Compute);
+ return graphics_info;
+}
+
+const ComputeInfo& Registry::GetComputeInfo() const {
+ ASSERT(stage == Tegra::Engines::ShaderType::Compute);
+ return compute_info;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
new file mode 100644
index 000000000..0c80d35fd
--- /dev/null
+++ b/src/video_core/shader/registry.h
@@ -0,0 +1,137 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <optional>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
+
+namespace VideoCommon::Shader {
+
+using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
+using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using BindlessSamplerMap =
+ std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
+
+struct GraphicsInfo {
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+ std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
+ tfb_layouts{};
+ std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
+ Maxwell::PrimitiveTopology primitive_topology{};
+ Maxwell::TessellationPrimitive tessellation_primitive{};
+ Maxwell::TessellationSpacing tessellation_spacing{};
+ bool tfb_enabled = false;
+ bool tessellation_clockwise = false;
+};
+static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
+ std::is_standard_layout_v<GraphicsInfo>);
+
+struct ComputeInfo {
+ std::array<u32, 3> workgroup_size{};
+ u32 shared_memory_size_in_words = 0;
+ u32 local_memory_size_in_words = 0;
+};
+static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
+
+struct SerializedRegistryInfo {
+ VideoCore::GuestDriverProfile guest_driver_profile;
+ u32 bound_buffer = 0;
+ GraphicsInfo graphics;
+ ComputeInfo compute;
+};
+
+/**
+ * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
+ * With it, the shader can obtain required data from GPU state and store it for disk shader
+ * compilation.
+ */
+class Registry {
+public:
+ explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
+
+ explicit Registry(Tegra::Engines::ShaderType shader_stage,
+ Tegra::Engines::ConstBufferEngineInterface& engine);
+
+ ~Registry();
+
+ /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
+ /// not it will obtain it from maxwell3d and register it.
+ std::optional<u32> ObtainKey(u32 buffer, u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+
+ std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+
+ /// Inserts a key.
+ void InsertKey(u32 buffer, u32 offset, u32 value);
+
+ /// Inserts a bound sampler key.
+ void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Inserts a bindless sampler key.
+ void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+ /// Checks keys and samplers against engine's current const buffers.
+ /// Returns true if they are the same value, false otherwise.
+ bool IsConsistent() const;
+
+ /// Returns true if the keys are equal to the other ones in the registry.
+ bool HasEqualKeys(const Registry& rhs) const;
+
+ /// Returns graphics information from this shader
+ const GraphicsInfo& GetGraphicsInfo() const;
+
+ /// Returns compute information from this shader
+ const ComputeInfo& GetComputeInfo() const;
+
+ /// Gives an getter to the const buffer keys in the database.
+ const KeyMap& GetKeys() const {
+ return keys;
+ }
+
+ /// Gets samplers database.
+ const BoundSamplerMap& GetBoundSamplers() const {
+ return bound_samplers;
+ }
+
+ /// Gets bindless samplers database.
+ const BindlessSamplerMap& GetBindlessSamplers() const {
+ return bindless_samplers;
+ }
+
+ /// Gets bound buffer used on this shader
+ u32 GetBoundBuffer() const {
+ return bound_buffer;
+ }
+
+ /// Obtains access to the guest driver's profile.
+ VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
+ return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
+ }
+
+private:
+ const Tegra::Engines::ShaderType stage;
+ VideoCore::GuestDriverProfile stored_guest_driver_profile;
+ Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
+ KeyMap keys;
+ BoundSamplerMap bound_samplers;
+ BindlessSamplerMap bindless_samplers;
+ u32 bound_buffer;
+ GraphicsInfo graphics_info;
+ ComputeInfo compute_info;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 3a5d280a9..425927777 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -11,6 +11,7 @@
#include "common/logging/log.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/shader/node_helper.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- ConstBufferLocker& locker)
- : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
+ Registry& registry)
+ : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
Decode();
PostDecode();
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index b0851c3be..dde036b40 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,8 +18,8 @@
#include "video_core/engines/shader_header.h"
#include "video_core/shader/ast.h"
#include "video_core/shader/compiler_settings.h"
-#include "video_core/shader/const_buffer_locker.h"
#include "video_core/shader/node.h"
+#include "video_core/shader/registry.h"
namespace VideoCommon::Shader {
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
- ConstBufferLocker& locker);
+ Registry& registry);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -414,7 +414,7 @@ private:
const ProgramCode& program_code;
const u32 main_offset;
const CompilerSettings settings;
- ConstBufferLocker& locker;
+ Registry& registry;
bool decompiled{};
bool disable_flow_stack{};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 15e22b9fa..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
return {tracked, track};
} else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
- auto bound_buffer = locker.ObtainBoundBuffer();
- if (!bound_buffer) {
+ const u32 bound_buffer = registry.GetBoundBuffer();
+ if (bound_buffer != cbuf->GetIndex()) {
return {};
}
- if (*bound_buffer != cbuf->GetIndex()) {
- return {};
- }
- auto pair = DecoupleIndirectRead(*operation);
+ const auto pair = DecoupleIndirectRead(*operation);
if (!pair) {
return {};
}
auto [gpr, base_offset] = *pair;
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
- auto gpu_driver = locker.AccessGuestDriverProfile();
- if (gpu_driver == nullptr) {
- return {};
- }
+ const auto& gpu_driver = registry.AccessGuestDriverProfile();
const u32 bindless_cv = NewCustomVariable();
- const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
- Immediate(gpu_driver->GetTextureHandlerSize()));
+ const Node op =
+ Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
const Node cv_node = GetCustomVariable(bindless_cv);
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/registry.h"
+#include "video_core/shader/transform_feedback.h"
+
+namespace VideoCommon::Shader {
+
+namespace {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
+
+/// Attribute offsets that describe a vector
+constexpr std::array VECTORS = {
+ 28, // gl_Position
+ 32, // Generic 0
+ 36, // Generic 1
+ 40, // Generic 2
+ 44, // Generic 3
+ 48, // Generic 4
+ 52, // Generic 5
+ 56, // Generic 6
+ 60, // Generic 7
+ 64, // Generic 8
+ 68, // Generic 9
+ 72, // Generic 10
+ 76, // Generic 11
+ 80, // Generic 12
+ 84, // Generic 13
+ 88, // Generic 14
+ 92, // Generic 15
+ 96, // Generic 16
+ 100, // Generic 17
+ 104, // Generic 18
+ 108, // Generic 19
+ 112, // Generic 20
+ 116, // Generic 21
+ 120, // Generic 22
+ 124, // Generic 23
+ 128, // Generic 24
+ 132, // Generic 25
+ 136, // Generic 26
+ 140, // Generic 27
+ 144, // Generic 28
+ 148, // Generic 29
+ 152, // Generic 30
+ 156, // Generic 31
+ 160, // gl_FrontColor
+ 164, // gl_FrontSecondaryColor
+ 160, // gl_BackColor
+ 164, // gl_BackSecondaryColor
+ 192, // gl_TexCoord[0]
+ 196, // gl_TexCoord[1]
+ 200, // gl_TexCoord[2]
+ 204, // gl_TexCoord[3]
+ 208, // gl_TexCoord[4]
+ 212, // gl_TexCoord[5]
+ 216, // gl_TexCoord[6]
+ 220, // gl_TexCoord[7]
+};
+} // namespace
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
+
+ std::unordered_map<u8, VaryingTFB> tfb;
+
+ for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
+ const auto& locations = info.tfb_varying_locs[buffer];
+ const auto& layout = info.tfb_layouts[buffer];
+ const std::size_t varying_count = layout.varying_count;
+
+ std::size_t highest = 0;
+
+ for (std::size_t offset = 0; offset < varying_count; ++offset) {
+ const std::size_t base_offset = offset;
+ const u8 location = locations[offset];
+
+ VaryingTFB varying;
+ varying.buffer = layout.stream;
+ varying.stride = layout.stride;
+ varying.offset = offset * sizeof(u32);
+ varying.components = 1;
+
+ if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
+ UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
+
+ const u8 base_index = location / 4;
+ while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
+ ++offset;
+ ++varying.components;
+ }
+ }
+
+ [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
+ UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
+
+ highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
+ }
+
+ UNIMPLEMENTED_IF(highest != layout.stride);
+ }
+ return tfb;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+
+#include "common/common_types.h"
+#include "video_core/shader/registry.h"
+
+namespace VideoCommon::Shader {
+
+struct VaryingTFB {
+ std::size_t buffer;
+ std::size_t stride;
+ std::size_t offset;
+ std::size_t components;
+};
+
+std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
+
+} // namespace VideoCommon::Shader