summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2019-09-23 21:40:58 +0200
committerFernandoS27 <fsahmkow27@gmail.com>2019-10-25 15:01:29 +0200
commitacd64411342e70bd7e9f7156f62c3b1a609ac3c4 (patch)
treec05f05bb52fdee4fb0c693c98dec5915b69b302d
parentVideoCore: Unify const buffer accessing along engines and provide ConstBufferLocker class to shaders. (diff)
downloadyuzu-acd64411342e70bd7e9f7156f62c3b1a609ac3c4.tar
yuzu-acd64411342e70bd7e9f7156f62c3b1a609ac3c4.tar.gz
yuzu-acd64411342e70bd7e9f7156f62c3b1a609ac3c4.tar.bz2
yuzu-acd64411342e70bd7e9f7156f62c3b1a609ac3c4.tar.lz
yuzu-acd64411342e70bd7e9f7156f62c3b1a609ac3c4.tar.xz
yuzu-acd64411342e70bd7e9f7156f62c3b1a609ac3c4.tar.zst
yuzu-acd64411342e70bd7e9f7156f62c3b1a609ac3c4.zip
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h13
-rw-r--r--src/video_core/shader/control_flow.cpp22
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp4
-rw-r--r--src/video_core/shader/shader_ir.h3
10 files changed, 82 insertions, 43 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 10114909b..9431d64ac 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1006,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
}
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
- tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second);
+ tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
+ cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset());
}();
@@ -1051,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
}
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
- tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second);
+ tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
+ cbuf.first, cbuf.second);
return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
}();
SetupImage(bindpoint, tic, entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 42ca3b1bd..9e2799876 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -10,6 +10,7 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
+#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -173,8 +174,9 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
}
/// Creates an unspecialized program from code streams
-GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
- ProgramCode program_code, ProgramCode program_code_b) {
+GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device,
+ ProgramType program_type, ProgramCode program_code,
+ ProgramCode program_code_b) {
GLShader::ShaderSetup setup(program_code);
setup.program.size_a = CalculateProgramSize(program_code);
setup.program.size_b = 0;
@@ -190,14 +192,25 @@ GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_
switch (program_type) {
case ProgramType::VertexA:
- case ProgramType::VertexB:
- return GLShader::GenerateVertexShader(device, setup);
- case ProgramType::Geometry:
- return GLShader::GenerateGeometryShader(device, setup);
- case ProgramType::Fragment:
- return GLShader::GenerateFragmentShader(device, setup);
- case ProgramType::Compute:
- return GLShader::GenerateComputeShader(device, setup);
+ case ProgramType::VertexB: {
+ VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex,
+ &(system.GPU().Maxwell3D())};
+ return GLShader::GenerateVertexShader(locker, device, setup);
+ }
+ case ProgramType::Geometry: {
+ VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry,
+ &(system.GPU().Maxwell3D())};
+ return GLShader::GenerateGeometryShader(locker, device, setup);
+ }
+ case ProgramType::Fragment: {
+ VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment,
+ &(system.GPU().Maxwell3D())};
+ return GLShader::GenerateFragmentShader(locker, device, setup);
+ }
+ case ProgramType::Compute: {
+ VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())};
+ return GLShader::GenerateComputeShader(locker, device, setup);
+ }
default:
UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
return {};
@@ -307,8 +320,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
ProgramCode&& program_code_b) {
const auto code_size{CalculateProgramSize(program_code)};
const auto code_size_b{CalculateProgramSize(program_code_b)};
- auto result{
- CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
+ auto result{CreateProgram(params.system, params.device, GetProgramType(program_type),
+ program_code, program_code_b)};
if (result.first.empty()) {
// TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
return {};
@@ -331,7 +344,7 @@ Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
- auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
+ auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})};
const auto code_size{CalculateProgramSize(code)};
params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
@@ -566,7 +579,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
result = {stored_decompiled.code, stored_decompiled.entries};
} else {
// Otherwise decompile the shader at boot and save the result to the decompiled file
- result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(),
+ result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(),
raw.GetProgramCodeB());
disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
}
@@ -612,7 +625,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier =
GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
- const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
+ const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr,
host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier);
@@ -639,7 +652,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
- const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
+ const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr,
host_ptr, unique_identifier};
const auto found = precompiled_shaders.find(unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index de195cc5d..6ff78f005 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -45,6 +45,7 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
struct ShaderParameters {
ShaderDiskCacheOpenGL& disk_cache;
const PrecompiledPrograms& precompiled_programs;
+ Core::System& system;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index b5a43e79e..817c6e12c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -21,7 +21,8 @@ static constexpr u32 COMPUTE_OFFSET = 0;
static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true};
-ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
+ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: VS" + id + "\n\n";
@@ -35,14 +36,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
)";
- const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
+ locker);
const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
ProgramResult program = Decompile(device, program_ir, stage, "vertex");
out += program.first;
if (setup.IsDualProgram()) {
const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b,
- settings);
+ settings, locker);
ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
out += program_b.first;
}
@@ -71,7 +73,8 @@ void main() {
return {std::move(out), std::move(program.second)};
}
-ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
+ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: GS" + id + "\n\n";
@@ -85,7 +88,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
)";
- const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
+ locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
out += program.first;
@@ -97,7 +101,8 @@ void main() {
return {std::move(out), std::move(program.second)};
}
-ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
+ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: FS" + id + "\n\n";
@@ -120,7 +125,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
)";
- const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
+ const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
+ locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
out += program.first;
@@ -133,13 +139,15 @@ void main() {
return {std::move(out), std::move(program.second)};
}
-ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
+ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup) {
const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
std::string out = "// Shader Unique Id: CS" + id + "\n\n";
out += GetCommonDeclarations();
- const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings);
+ const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings,
+ locker);
ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
out += program.first;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3833e88ab..05f157298 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -16,6 +16,7 @@ class Device;
namespace OpenGL::GLShader {
+using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
struct ShaderSetup {
@@ -46,15 +47,19 @@ private:
};
/// Generates the GLSL vertex shader program source code for the given VS program
-ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup);
+ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup);
/// Generates the GLSL geometry shader program source code for the given GS program
-ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup);
+ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup);
/// Generates the GLSL fragment shader program source code for the given FS program
-ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
+ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup);
/// Generates the GLSL compute shader program source code for the given CS program
-ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
+ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device,
+ const ShaderSetup& setup);
} // namespace OpenGL::GLShader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 70f758642..dac2e4272 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -57,8 +57,8 @@ struct BlockInfo {
struct CFGRebuildState {
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
- const u32 start)
- : start{start}, program_code{program_code}, program_size{program_size} {}
+ const u32 start, ConstBufferLocker& locker)
+ : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {}
u32 start{};
std::vector<BlockInfo> block_info{};
@@ -72,6 +72,7 @@ struct CFGRebuildState {
const ProgramCode& program_code;
const std::size_t program_size;
ASTManager* manager;
+ ConstBufferLocker& locker;
};
enum class BlockCollision : u32 { None, Found, Inside };
@@ -214,7 +215,7 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState&
if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
if (instr.gpr0.Value() == track_register) {
track_register = instr.gpr8.Value();
- result.entries = instr.alu.GetSignedImm20_20();
+ result.entries = instr.alu.GetSignedImm20_20() + 1;
pos--;
found_track = true;
break;
@@ -406,8 +407,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
auto tmp = TrackBranchIndirectInfo(state, address, offset);
if (tmp) {
auto result = *tmp;
- LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}",
- result.buffer, result.offset, result.entries);
+ std::string entries{};
+ for (u32 i = 0; i < result.entries; i++) {
+ auto k = locker.ObtainKey(result.buffer, result.offset + i * 4);
+ entries = entries + std::to_string(*k) + '\n';
+ }
+ LOG_CRITICAL(HW_GPU,
+ "Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}",
+ result.buffer, result.offset, result.entries, entries);
} else {
LOG_CRITICAL(HW_GPU, "Track Unsuccesful");
}
@@ -588,14 +595,15 @@ void DecompileShader(CFGRebuildState& state) {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address,
- const CompilerSettings& settings) {
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker) {
auto result_out = std::make_unique<ShaderCharacteristics>();
if (settings.depth == CompileDepth::BruteForce) {
result_out->settings.depth = CompileDepth::BruteForce;
return result_out;
}
- CFGRebuildState state{program_code, program_size, start_address};
+ CFGRebuildState state{program_code, program_size, start_address, locker};
// Inspect Code and generate blocks
state.labels.clear();
state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 37e987d62..6d0e50d7c 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -78,6 +78,7 @@ struct ShaderCharacteristics {
std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
std::size_t program_size, u32 start_address,
- const CompilerSettings& settings);
+ const CompilerSettings& settings,
+ ConstBufferLocker& locker);
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2626b1616..3f87b87ca 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -102,7 +102,7 @@ void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
decompiled = false;
- auto info = ScanFlow(program_code, program_size, main_offset, settings);
+ auto info = ScanFlow(program_code, program_size, main_offset, settings, locker);
auto& shader_info = *info;
coverage_begin = shader_info.start;
coverage_end = shader_info.end;
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index c1f2b88c8..6430575ec 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -23,9 +23,9 @@ using Tegra::Shader::PredOperation;
using Tegra::Shader::Register;
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size,
- CompilerSettings settings)
+ CompilerSettings settings, ConstBufferLocker& locker)
: program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{},
- program_manager{true, true}, settings{settings} {
+ program_manager{true, true}, settings{settings}, locker{locker} {
Decode();
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 68818643c..e3b568d3e 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -68,7 +68,7 @@ struct GlobalMemoryUsage {
class ShaderIR final {
public:
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size,
- CompilerSettings settings);
+ CompilerSettings settings, ConstBufferLocker& locker);
~ShaderIR();
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -389,6 +389,7 @@ private:
NodeBlock global_code;
ASTManager program_manager;
CompilerSettings settings{};
+ ConstBufferLocker& locker;
std::set<u32> used_registers;
std::set<Tegra::Shader::Pred> used_predicates;