From 15a0e1481d9a1efb3e3aa61cbaf2fa1ba0392d71 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 19:09:21 -0300
Subject: shader_ir: Initial implementation

---
 src/video_core/shader/decode/arithmetic.cpp        | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/arithmetic_half.cpp   | 24 ++++++++++++++++++++++
 .../shader/decode/arithmetic_half_immediate.cpp    | 24 ++++++++++++++++++++++
 .../shader/decode/arithmetic_immediate.cpp         | 24 ++++++++++++++++++++++
 .../shader/decode/arithmetic_integer.cpp           | 24 ++++++++++++++++++++++
 .../shader/decode/arithmetic_integer_immediate.cpp | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/bfe.cpp               | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/bfi.cpp               | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/conversion.cpp        | 24 ++++++++++++++++++++++
 .../shader/decode/decode_integer_set.cpp           |  0
 src/video_core/shader/decode/ffma.cpp              | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/float_set.cpp         | 24 ++++++++++++++++++++++
 .../shader/decode/float_set_predicate.cpp          | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/half_set.cpp          | 24 ++++++++++++++++++++++
 .../shader/decode/half_set_predicate.cpp           | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/hfma2.cpp             | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/integer_set.cpp       | 24 ++++++++++++++++++++++
 .../shader/decode/integer_set_predicate.cpp        | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/memory.cpp            | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/other.cpp             | 24 ++++++++++++++++++++++
 .../shader/decode/predicate_set_predicate.cpp      | 24 ++++++++++++++++++++++
 .../shader/decode/predicate_set_register.cpp       | 24 ++++++++++++++++++++++
 .../shader/decode/register_set_predicate.cpp       | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/shift.cpp             | 24 ++++++++++++++++++++++
 src/video_core/shader/decode/xmad.cpp              | 24 ++++++++++++++++++++++
 25 files changed, 576 insertions(+)
 create mode 100644 src/video_core/shader/decode/arithmetic.cpp
 create mode 100644 src/video_core/shader/decode/arithmetic_half.cpp
 create mode 100644 src/video_core/shader/decode/arithmetic_half_immediate.cpp
 create mode 100644 src/video_core/shader/decode/arithmetic_immediate.cpp
 create mode 100644 src/video_core/shader/decode/arithmetic_integer.cpp
 create mode 100644 src/video_core/shader/decode/arithmetic_integer_immediate.cpp
 create mode 100644 src/video_core/shader/decode/bfe.cpp
 create mode 100644 src/video_core/shader/decode/bfi.cpp
 create mode 100644 src/video_core/shader/decode/conversion.cpp
 create mode 100644 src/video_core/shader/decode/decode_integer_set.cpp
 create mode 100644 src/video_core/shader/decode/ffma.cpp
 create mode 100644 src/video_core/shader/decode/float_set.cpp
 create mode 100644 src/video_core/shader/decode/float_set_predicate.cpp
 create mode 100644 src/video_core/shader/decode/half_set.cpp
 create mode 100644 src/video_core/shader/decode/half_set_predicate.cpp
 create mode 100644 src/video_core/shader/decode/hfma2.cpp
 create mode 100644 src/video_core/shader/decode/integer_set.cpp
 create mode 100644 src/video_core/shader/decode/integer_set_predicate.cpp
 create mode 100644 src/video_core/shader/decode/memory.cpp
 create mode 100644 src/video_core/shader/decode/other.cpp
 create mode 100644 src/video_core/shader/decode/predicate_set_predicate.cpp
 create mode 100644 src/video_core/shader/decode/predicate_set_register.cpp
 create mode 100644 src/video_core/shader/decode/register_set_predicate.cpp
 create mode 100644 src/video_core/shader/decode/shift.cpp
 create mode 100644 src/video_core/shader/decode/xmad.cpp

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
new file mode 100644
index 000000000..9242a7389
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
new file mode 100644
index 000000000..3b189b0d1
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
new file mode 100644
index 000000000..8d8a2dad9
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
new file mode 100644
index 000000000..18fd2082e
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
new file mode 100644
index 000000000..12c64e97a
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
new file mode 100644
index 000000000..46f340235
--- /dev/null
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
new file mode 100644
index 000000000..ffd904c54
--- /dev/null
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
new file mode 100644
index 000000000..b94d46ce6
--- /dev/null
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
new file mode 100644
index 000000000..c6eb2952c
--- /dev/null
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
new file mode 100644
index 000000000..2044113f0
--- /dev/null
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
new file mode 100644
index 000000000..17d47c17a
--- /dev/null
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
new file mode 100644
index 000000000..1dbe34353
--- /dev/null
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
new file mode 100644
index 000000000..af363d5d2
--- /dev/null
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
new file mode 100644
index 000000000..5fe123ea5
--- /dev/null
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
new file mode 100644
index 000000000..5ce08481e
--- /dev/null
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
new file mode 100644
index 000000000..316a7d8ad
--- /dev/null
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
new file mode 100644
index 000000000..10975c394
--- /dev/null
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
new file mode 100644
index 000000000..d6086004b
--- /dev/null
+++ b/src/video_core/shader/decode/memory.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
new file mode 100644
index 000000000..d84702a4f
--- /dev/null
+++ b/src/video_core/shader/decode/other.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
new file mode 100644
index 000000000..1ad853fda
--- /dev/null
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
new file mode 100644
index 000000000..67a06b5b4
--- /dev/null
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
new file mode 100644
index 000000000..29a348cf5
--- /dev/null
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
new file mode 100644
index 000000000..41f5b8cb0
--- /dev/null
+++ b/src/video_core/shader/decode/shift.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
new file mode 100644
index 000000000..27a2fc05d
--- /dev/null
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+
+u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    UNIMPLEMENTED();
+
+    return pc;
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
-- 
cgit v1.2.3


From 4c70d5b8eb68a61f5504a05dd597ecb2b04441b5 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:53:50 -0300
Subject: shader_decode: Implement MOV_C and MOV_R

---
 src/video_core/shader/decode/arithmetic.cpp | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 9242a7389..c297f729e 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -11,12 +11,34 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::SubOp;
 
 u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    Node op_a = GetRegister(instr.gpr8);
+
+    Node op_b = [&]() -> Node {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::MOV_C:
+    case OpCode::Id::MOV_R: {
+        // MOV does not have neither 'abs' nor 'neg' bits.
+        SetRegister(bb, instr.gpr0, op_b);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 7c192ec43fb6a08baea5d55aa47fcf3fa98d4343 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:54:47 -0300
Subject: shader_decode: Implement FMUL_C, FMUL_R and FMUL_IMM

---
 src/video_core/shader/decode/arithmetic.cpp | 42 +++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index c297f729e..78bca79e3 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -36,6 +36,48 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, op_b);
         break;
     }
+    case OpCode::Id::FMUL_C:
+    case OpCode::Id::FMUL_R:
+    case OpCode::Id::FMUL_IMM: {
+        // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
+        UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented",
+                             instr.fmul.tab5cb8_2.Value());
+        UNIMPLEMENTED_IF_MSG(
+            instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
+            instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in FMUL is not implemented");
+
+        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
+
+        // TODO(Rodrigo): Should precise be used when there's a postfactor?
+        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+
+        if (instr.fmul.postfactor != 0) {
+            auto postfactor = static_cast<s32>(instr.fmul.postfactor);
+
+            // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
+            // logic.
+            if (postfactor >= 4) {
+                postfactor = 7 - postfactor;
+            } else {
+                postfactor = 0 - postfactor;
+            }
+
+            if (postfactor > 0) {
+                value = Operation(OperationCode::FMul, NO_PRECISE, value,
+                                  Immediate(static_cast<f32>(1 << postfactor)));
+            } else {
+                value = Operation(OperationCode::FDiv, NO_PRECISE, value,
+                                  Immediate(static_cast<f32>(1 << -postfactor)));
+            }
+        }
+
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 4ccaa1402d376af14d8527c0a0bcc77be007bd3c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:55:19 -0300
Subject: shader_decode: Implement FADD_C, FADD_R and FADD_IMM

---
 src/video_core/shader/decode/arithmetic.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 78bca79e3..d196d94b5 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -78,6 +78,21 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::FADD_C:
+    case OpCode::Id::FADD_R:
+    case OpCode::Id::FADD_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in FADD is not implemented");
+
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
+        Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 964ddeeb90b655d8b5558002db7c780c0394263c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:56:21 -0300
Subject: shader_decode: Implement MUFU

---
 src/video_core/shader/decode/arithmetic.cpp | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index d196d94b5..fb688c324 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -93,6 +93,35 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::MUFU: {
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+
+        Node value = [&]() {
+            switch (instr.sub_op) {
+            case SubOp::Cos:
+                return Operation(OperationCode::FCos, PRECISE, op_a);
+            case SubOp::Sin:
+                return Operation(OperationCode::FSin, PRECISE, op_a);
+            case SubOp::Ex2:
+                return Operation(OperationCode::FExp2, PRECISE, op_a);
+            case SubOp::Lg2:
+                return Operation(OperationCode::FLog2, PRECISE, op_a);
+            case SubOp::Rcp:
+                return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
+            case SubOp::Rsq:
+                return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
+            case SubOp::Sqrt:
+                return Operation(OperationCode::FSqrt, PRECISE, op_a);
+            default:
+                UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
+                                  static_cast<unsigned>(instr.sub_op.Value()));
+            }
+        }();
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 5e6a0a08c14df8e1993f4f72b1bbfd388a5ea48e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:56:45 -0300
Subject: shader_decode: Implement FMNMX_C, FMNMX_R and FMNMX_IMM

---
 src/video_core/shader/decode/arithmetic.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index fb688c324..0b6654397 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -122,6 +122,24 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::FMNMX_C:
+    case OpCode::Id::FMNMX_R:
+    case OpCode::Id::FMNMX_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in FMNMX is not implemented");
+
+        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+
+        const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
+
+        const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
+        const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
+
+        SetRegister(bb, instr.gpr0,
+                    Operation(OperationCode::Select, NO_PRECISE, condition, min, max));
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 06cb910c6d9b0be664db4305f90974198f84ae98 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:57:09 -0300
Subject: shader_decode: Stub RRO_C, RRO_R and RRO_IMM

---
 src/video_core/shader/decode/arithmetic.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 0b6654397..9f8c27b3e 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -140,6 +140,15 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
                     Operation(OperationCode::Select, NO_PRECISE, condition, min, max));
         break;
     }
+    case OpCode::Id::RRO_C:
+    case OpCode::Id::RRO_R:
+    case OpCode::Id::RRO_IMM: {
+        // Currently RRO is only implemented as a register move.
+        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
+        SetRegister(bb, instr.gpr0, op_b);
+        LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 2edee801ce003f3a097cbbdbaf1b9bbb4bcddbc4 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:58:23 -0300
Subject: shader_decode: Implement MOV32_IMM

---
 src/video_core/shader/decode/arithmetic_immediate.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 18fd2082e..2d385f48a 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -16,7 +16,15 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::MOV32_IMM: {
+        SetRegister(bb, instr.gpr0, GetImmediate32(instr));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
+                          opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From c9b2a1b051fe386fa33427b527ca626ad3fdbfaf Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:58:48 -0300
Subject: shader_decode: Implement FMUL32_IMM

---
 src/video_core/shader/decode/arithmetic_immediate.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 2d385f48a..0e4cbccab 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -21,6 +21,16 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, GetImmediate32(instr));
         break;
     }
+    case OpCode::Id::FMUL32_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
+                             "Condition codes generation in FMUL32 is not implemented");
+        Node value =
+            Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
+        value = GetSaturatedFloat(value, instr.fmul32.saturate);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
                           opcode->get().GetName());
-- 
cgit v1.2.3


From ea358bd4bf70b6b93b4022ede7a8bcd111f10f9e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 20 Dec 2018 23:59:01 -0300
Subject: shader_decode: Implement FADD32I

---
 src/video_core/shader/decode/arithmetic_immediate.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 0e4cbccab..996b2537a 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -31,6 +31,18 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::FADD32I: {
+        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
+                             "Condition codes generation in FADD32I is not implemented");
+        const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
+                                                instr.fadd32i.negate_a);
+        const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
+                                                instr.fadd32i.negate_b);
+
+        const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
                           opcode->get().GetName());
-- 
cgit v1.2.3


From e3f1233ce13d82623173d690a6aa7819d68f069e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 00:05:42 -0300
Subject: shader_decode: Implement LD_A

---
 src/video_core/shader/decode/memory.cpp | 40 ++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index d6086004b..30e2b33a3 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -9,14 +9,52 @@
 
 namespace VideoCommon::Shader {
 
+using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
 
 u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::LD_A: {
+        // Note: Shouldn't this be interp mode flat? As in no interpolation made.
+        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
+                             "Indirect attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
+                             "Unaligned attribute loads are not supported");
+
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
+                                          Tegra::Shader::IpaSampleMode::Default};
+
+        u64 next_element = instr.attribute.fmt20.element;
+        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+        const auto LoadNextElement = [&](u32 reg_offset) {
+            const Node buffer = GetRegister(instr.gpr39);
+            const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index),
+                                                     next_element, input_mode, buffer);
+
+            SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
+
+            // Load the next attribute element into the following register. If the element
+            // to load goes beyond the vec4 size, load the first element of the next
+            // attribute.
+            next_element = (next_element + 1) % 4;
+            next_index = next_index + (next_element == 0 ? 1 : 0);
+        };
+
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            LoadNextElement(reg_offset);
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 0c049e0a2106ff1624dfe4dcbfe8703584863c7c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 00:06:13 -0300
Subject: shader_decode: Implement ST_A

---
 src/video_core/shader/decode/memory.cpp | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 30e2b33a3..aea1a0675 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -52,6 +52,36 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::ST_A: {
+        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
+                             "Indirect attribute loads are not supported");
+        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
+                             "Unaligned attribute loads are not supported");
+
+        u64 next_element = instr.attribute.fmt20.element;
+        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+
+        const auto StoreNextElement = [&](u32 reg_offset) {
+            const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
+                                                 next_element, GetRegister(instr.gpr39));
+            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
+
+            bb.push_back(Operation(OperationCode::Assign, dest, src));
+
+            // Load the next attribute element into the following register. If the element
+            // to load goes beyond the vec4 size, load the first element of the next
+            // attribute.
+            next_element = (next_element + 1) % 4;
+            next_index = next_index + (next_element == 0 ? 1 : 0);
+        };
+
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
+            StoreNextElement(reg_offset);
+        }
+
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From cacb934f21d5b2abcbae168f8916bf3e3a21d64b Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 00:07:32 -0300
Subject: shader_decode: Implement EXIT

---
 src/video_core/shader/decode/other.cpp | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d84702a4f..2a5b70b8b 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -16,7 +16,38 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::EXIT: {
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
+                             static_cast<u32>(cc));
+
+        switch (instr.flow.cond) {
+        case Tegra::Shader::FlowCondition::Always:
+            bb.push_back(Operation(OperationCode::Exit));
+            if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) {
+                // If this is an unconditional exit then just end processing here,
+                // otherwise we have to account for the possibility of the condition
+                // not being met, so continue processing the next instruction.
+                pc = MAX_PROGRAM_LENGTH - 1;
+            }
+            break;
+
+        case Tegra::Shader::FlowCondition::Fcsm_Tr:
+            // TODO(bunnei): What is this used for? If we assume this conditon is not
+            // satisifed, dual vertex shaders in Farming Simulator make more sense
+            UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
+            break;
+
+        default:
+            UNIMPLEMENTED_MSG("Unhandled flow condition: {}",
+                              static_cast<u32>(instr.flow.cond.Value()));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 4f95dc950ee483ac9d19fb98209abeb26556e26f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 00:08:21 -0300
Subject: shader_decode: Implement IPA

---
 src/video_core/shader/decode/other.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 2a5b70b8b..ffdc77d90 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -45,6 +45,18 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::IPA: {
+        const auto& attribute = instr.attribute.fmt28;
+        const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
+                                                instr.ipa.sample_mode.Value()};
+
+        const Node input_attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
+        const Node ipa = Operation(OperationCode::Ipa, input_attr);
+        const Node value = GetSaturatedFloat(ipa, instr.ipa.saturate);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 8215ae942c72ec20c2ebebbf8fc5784e3f21bb3c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 00:11:33 -0300
Subject: shader_decode: Partially implement BRA

---
 src/video_core/shader/decode/other.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index ffdc77d90..3f058324c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -11,6 +11,7 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::ConditionCode;
 
 u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
@@ -45,6 +46,17 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::BRA: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "BRA with constant buffers are not implemented");
+
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF(cc != Tegra::Shader::ConditionCode::T);
+
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        bb.push_back(Operation(OperationCode::Bra, Immediate(target)));
+        break;
+    }
     case OpCode::Id::IPA: {
         const auto& attribute = instr.attribute.fmt28;
         const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
-- 
cgit v1.2.3


From c703f0aee41ba08219f10217169813ac97da06c2 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 00:12:48 -0300
Subject: shader_decode: Implement FSETP

---
 .../shader/decode/float_set_predicate.cpp          | 34 +++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 1dbe34353..5dd085fea 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -11,12 +11,44 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
 
 u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
+                                            instr.fsetp.neg_a != 0);
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        }
+    }();
+    op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
+    const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
+    const Node value = Operation(combiner, predicate, second_pred);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    SetPredicate(bb, instr.fsetp.pred3, value);
+
+    if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+        // if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
+        const Node second_value = Operation(combiner, negated_pred, second_pred);
+        SetPredicate(bb, instr.fsetp.pred0, second_value);
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 878672f371e71d7d7a5b44aec0dc4918a682732d Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 01:27:47 -0300
Subject: shader_decode: Implement TEXS (F32)

---
 src/video_core/shader/decode/memory.cpp | 199 ++++++++++++++++++++++++++++++++
 1 file changed, 199 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index aea1a0675..1f458b6d7 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <vector>
+
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -13,6 +15,24 @@ using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::TextureMiscMode;
+using Tegra::Shader::TextureProcessMode;
+using Tegra::Shader::TextureType;
+
+static std::size_t GetCoordCount(TextureType texture_type) {
+    switch (texture_type) {
+    case TextureType::Texture1D:
+        return 1;
+    case TextureType::Texture2D:
+        return 2;
+    case TextureType::Texture3D:
+    case TextureType::TextureCube:
+        return 3;
+    default:
+        UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
+        return 0;
+    }
+}
 
 u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
@@ -82,6 +102,27 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
         break;
     }
+    case OpCode::Id::TEXS: {
+        Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
+        const bool is_array{instr.texs.IsArrayTexture()};
+        const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.texs.GetTextureProcessMode();
+
+        if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
+        }
+
+        const Node texture =
+            GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
+
+        if (instr.texs.fp32_flag) {
+            WriteTexsInstructionFloat(bb, instr, texture);
+        } else {
+            UNIMPLEMENTED();
+            // WriteTexsInstructionHalfFloat(bb, instr, texture);
+        }
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
     }
@@ -89,4 +130,162 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
     return pc;
 }
 
+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
+                                    bool is_array, bool is_shadow) {
+    const auto offset = static_cast<std::size_t>(sampler.index.Value());
+
+    // If this sampler has already been used, return the existing mapping.
+    const auto itr =
+        std::find_if(used_samplers.begin(), used_samplers.end(),
+                     [&](const Sampler& entry) { return entry.GetOffset() == offset; });
+    if (itr != used_samplers.end()) {
+        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
+               itr->IsShadow() == is_shadow);
+        return *itr;
+    }
+
+    // Otherwise create a new mapping for this sampler
+    const std::size_t next_index = used_samplers.size();
+    const Sampler entry{offset, next_index, type, is_array, is_shadow};
+    return *used_samplers.emplace(entry).first;
+}
+
+void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
+                                         Node texture) {
+    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
+    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
+
+    MetaComponents meta;
+    std::array<Node, 4> dest;
+
+    std::size_t written_components = 0;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component)) {
+            continue;
+        }
+        meta.components_map[written_components] = static_cast<u32>(component);
+
+        if (written_components < 2) {
+            // Write the first two swizzle components to gpr0 and gpr0+1
+            dest[written_components] = GetRegister(instr.gpr0.Value() + written_components % 2);
+        } else {
+            ASSERT(instr.texs.HasTwoDestinations());
+            // Write the rest of the swizzle components to gpr28 and gpr28+1
+            dest[written_components] = GetRegister(instr.gpr28.Value() + written_components % 2);
+        }
+
+        ++written_components;
+    }
+
+    std::generate(dest.begin() + written_components, dest.end(), [&]() { return GetRegister(RZ); });
+
+    bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2],
+                           dest[3]));
+}
+
+Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
+                              TextureProcessMode process_mode, bool depth_compare, bool is_array,
+                              std::size_t bias_offset, std::vector<Node>&& coords) {
+    UNIMPLEMENTED_IF_MSG(
+        (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
+            (texture_type == TextureType::TextureCube && is_array && depth_compare),
+        "This method is not supported.");
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
+                            process_mode == TextureProcessMode::LL ||
+                            process_mode == TextureProcessMode::LLA;
+
+    const bool gl_lod_supported =
+        !((texture_type == TextureType::Texture2D && is_array && depth_compare) ||
+          (texture_type == TextureType::TextureCube && !is_array && depth_compare));
+
+    const OperationCode read_method =
+        lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
+
+    const MetaTexture meta{sampler, static_cast<u32>(coords.size())};
+
+    std::vector<Node> params = std::move(coords);
+
+    if (process_mode != TextureProcessMode::None) {
+        if (process_mode == TextureProcessMode::LZ) {
+            if (gl_lod_supported) {
+                params.push_back(Immediate(0));
+            } else {
+                // Lod 0 is emulated by a big negative bias in scenarios that are not supported by
+                // GLSL
+                params.push_back(Immediate(-1000));
+            }
+        } else {
+            // If present, lod or bias are always stored in the register indexed by the gpr20 field
+            // with an offset depending on the usage of the other registers
+            params.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
+        }
+    }
+
+    return Operation(read_method, meta, std::move(params));
+}
+
+Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
+                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+
+    const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+                                   process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
+        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+    const u64 last_coord_register =
+        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20
+        // or in the next register if lod or bias are used
+        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+        coords.push_back(GetRegister(depth_register));
+    }
+    if (is_array) {
+        coords.push_back(
+            Operation(OperationCode::ICastFloat, NO_PRECISE, GetRegister(array_register)));
+    }
+    // Fill ignored coordinates
+    while (coords.size() < total_coord_count) {
+        coords.push_back(Immediate(0));
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array,
+                          (coord_count > 2 ? 1 : 0), std::move(coords));
+}
+
+std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
+    TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
+    std::size_t max_coords, std::size_t max_inputs) {
+
+    const std::size_t coord_count = GetCoordCount(texture_type);
+
+    std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
+    const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
+    if (total_coord_count > max_coords || total_reg_count > max_inputs) {
+        UNIMPLEMENTED_MSG("Unsupported Texture operation");
+        total_coord_count = std::min(total_coord_count, max_coords);
+    }
+    // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
+    total_coord_count +=
+        (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
+
+    return {coord_count, total_coord_count};
+}
+
 } // namespace VideoCommon::Shader
\ No newline at end of file
-- 
cgit v1.2.3


From 2b90637f4bb0358525715c113f903d0c069b0eb4 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 13 Dec 2018 16:59:28 -0300
Subject: shader_decode: Implement TEX and TXQ

---
 src/video_core/shader/decode/memory.cpp | 219 ++++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 1f458b6d7..220238ce8 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <vector>
 
 #include "common/assert.h"
@@ -102,6 +103,44 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
         break;
     }
+    case OpCode::Id::TEX: {
+        Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
+        const bool is_array = instr.tex.array != 0;
+        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex.GetTextureProcessMode();
+        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+
+        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TEX.NODEP is not implemented");
+        }
+
+        const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array);
+
+        if (depth_compare) {
+            SetRegister(bb, instr.gpr0, texture);
+        } else {
+            MetaComponents meta;
+            std::array<Node, 4> dest;
+
+            std::size_t dest_elem = 0;
+            for (std::size_t elem = 0; elem < 4; ++elem) {
+                if (!instr.tex.IsComponentEnabled(elem)) {
+                    // Skip disabled components
+                    continue;
+                }
+                meta.components_map[dest_elem] = static_cast<u32>(elem);
+                dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
+
+                ++dest_elem;
+            }
+            std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+
+            bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture,
+                                   dest[0], dest[1], dest[2], dest[3]));
+        }
+        break;
+    }
     case OpCode::Id::TEXS: {
         Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
         const bool is_array{instr.texs.IsArrayTexture()};
@@ -123,6 +162,148 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::TLD4: {
+        ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
+        ASSERT(instr.tld4.array == 0);
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
+                             "NDV is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
+                             "PTP is not implemented");
+
+        if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
+        }
+
+        const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+        auto texture_type = instr.tld4.texture_type.Value();
+        u32 num_coordinates = static_cast<u32>(GetCoordCount(texture_type));
+        if (depth_compare)
+            num_coordinates += 1;
+
+        std::vector<Node> params;
+
+        switch (num_coordinates) {
+        case 2: {
+            params.push_back(GetRegister(instr.gpr8));
+            params.push_back(GetRegister(instr.gpr8.Value() + 1));
+            break;
+        }
+        case 3: {
+            params.push_back(GetRegister(instr.gpr8));
+            params.push_back(GetRegister(instr.gpr8.Value() + 1));
+            params.push_back(GetRegister(instr.gpr8.Value() + 2));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled coordinates number {}", static_cast<u32>(num_coordinates));
+            params.push_back(GetRegister(instr.gpr8));
+            params.push_back(GetRegister(instr.gpr8.Value() + 1));
+            num_coordinates = 2;
+            texture_type = Tegra::Shader::TextureType::Texture2D;
+        }
+        params.push_back(Immediate(static_cast<u32>(instr.tld4.component)));
+
+        const auto& sampler = GetSampler(instr.sampler, texture_type, false, depth_compare);
+        const MetaTexture meta{sampler, num_coordinates};
+
+        const Node texture =
+            Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+
+        if (depth_compare) {
+            SetRegister(bb, instr.gpr0, texture);
+        } else {
+            MetaComponents meta;
+            std::array<Node, 4> dest;
+
+            std::size_t dest_elem = 0;
+            for (std::size_t elem = 0; elem < 4; ++elem) {
+                if (!instr.tex.IsComponentEnabled(elem)) {
+                    // Skip disabled components
+                    continue;
+                }
+                meta.components_map[dest_elem] = static_cast<u32>(elem);
+                dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
+
+                ++dest_elem;
+            }
+            std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+
+            bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture,
+                                   dest[0], dest[1], dest[2], dest[3]));
+        }
+        break;
+    }
+    case OpCode::Id::TLD4S: {
+        UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+
+        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TLD4S.NODEP is not implemented");
+        }
+
+        const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+        const Node op_a = GetRegister(instr.gpr8);
+        const Node op_b = GetRegister(instr.gpr20);
+
+        std::vector<Node> params;
+
+        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+        if (depth_compare) {
+            // Note: TLD4S coordinate encoding works just like TEXS's
+            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
+            params.push_back(op_a);
+            params.push_back(op_y);
+            params.push_back(op_b);
+        } else {
+            params.push_back(op_a);
+            params.push_back(op_b);
+        }
+        const auto num_coords = static_cast<u32>(params.size());
+        params.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
+
+        const auto& sampler =
+            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+        const MetaTexture meta{sampler, num_coords};
+
+        WriteTexsInstructionFloat(
+            bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params)));
+        break;
+    }
+    case OpCode::Id::TXQ: {
+        if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TXQ.NODEP is not implemented");
+        }
+
+        // TODO: The new commits on the texture refactor, change the way samplers work.
+        // Sadly, not all texture instructions specify the type of texture their sampler
+        // uses. This must be fixed at a later instance.
+        const auto& sampler =
+            GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+
+        switch (instr.txq.query_type) {
+        case Tegra::Shader::TextureQueryType::Dimension: {
+            const MetaTexture meta_texture{sampler};
+            const MetaComponents meta_components{{0, 1, 2, 3}};
+
+            const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture,
+                                           GetRegister(instr.gpr8));
+            std::array<Node, 4> dest;
+            for (std::size_t i = 0; i < dest.size(); ++i) {
+                dest[i] = GetRegister(instr.gpr0.Value() + i);
+            }
+
+            bb.push_back(Operation(OperationCode::AssignComposite, meta_components, texture,
+                                   dest[0], dest[1], dest[2], dest[3]));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
+                              static_cast<u32>(instr.txq.query_type.Value()));
+        }
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
     }
@@ -227,6 +408,44 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
     return Operation(read_method, meta, std::move(params));
 }
 
+Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
+                          TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+    const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+                                   process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+    const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
+        texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> coords;
+    for (std::size_t i = 0; i < coord_count; ++i) {
+        coords.push_back(GetRegister(coord_register + i));
+    }
+    // 1D.DC in opengl the 2nd component is ignored.
+    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
+        coords.push_back(Immediate(0.0f));
+    }
+    if (depth_compare) {
+        // Depth is always stored in the register signaled by gpr20
+        // or in the next register if lod or bias are used
+        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+        coords.push_back(GetRegister(depth_register));
+    }
+    if (is_array) {
+        coords.push_back(GetRegister(array_register));
+    }
+    // Fill ignored coordinates
+    while (coords.size() < total_coord_count) {
+        coords.push_back(Immediate(0));
+    }
+
+    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0,
+                          std::move(coords));
+}
+
 Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
 
-- 
cgit v1.2.3


From 802c23b8a8f1a14100fb9b291482ae197ba53293 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 13 Dec 2018 18:35:07 -0300
Subject: shader_decode: Implement TMML

---
 src/video_core/shader/decode/memory.cpp | 48 ++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 220238ce8..d8265d3fd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -112,7 +112,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
                              "AOFFI is not implemented");
 
         if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEX.NODEP is not implemented");
+            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
         }
 
         const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array);
@@ -240,7 +240,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
                              "AOFFI is not implemented");
 
         if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLD4S.NODEP is not implemented");
+            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
         }
 
         const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
@@ -273,7 +273,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
     }
     case OpCode::Id::TXQ: {
         if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TXQ.NODEP is not implemented");
+            LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
         }
 
         // TODO: The new commits on the texture refactor, change the way samplers work.
@@ -304,6 +304,48 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::TMML: {
+        UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
+                             "NDV is not implemented");
+
+        if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+        }
+
+        auto texture_type = instr.tmml.texture_type.Value();
+        const bool is_array = instr.tmml.array != 0;
+        const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+        std::vector<Node> coords;
+
+        // TODO: Add coordinates for different samplers once other texture types are implemented.
+        switch (texture_type) {
+        case TextureType::Texture1D:
+            coords.push_back(GetRegister(instr.gpr8));
+            break;
+        case TextureType::Texture2D:
+            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
+            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
+
+            // Fallback to interpreting as a 2D texture for now
+            coords.push_back(GetRegister(instr.gpr8.Value() + 0));
+            coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+            texture_type = TextureType::Texture2D;
+        }
+
+        const MetaTexture meta_texture{sampler, static_cast<u32>(coords.size())};
+        const Node texture =
+            Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords));
+
+        const MetaComponents meta_composite{{0, 1, 2, 3}};
+        bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture,
+                               GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1),
+                               GetRegister(RZ), GetRegister(RZ)));
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From abdbafbc203b6dbb8e690d9dda02fc423608401f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 15 Dec 2018 02:07:46 -0300
Subject: shader_decode: Implement PSETP

---
 .../shader/decode/predicate_set_predicate.cpp      | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
index 1ad853fda..24352170d 100644
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -11,12 +11,32 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
 
 u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+    const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
+    const Node predicate = Operation(combiner, op_a, op_b);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
+
+    if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
+        SetPredicate(
+            bb, instr.psetp.pred0,
+            Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), second_pred));
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 97f33f00cf4076e49f763c8139d388d47a41c84d Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 15 Dec 2018 03:18:25 -0300
Subject: shader_decode: Implement SSY and SYNC

---
 src/video_core/shader/decode/other.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 3f058324c..4c2d24202 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -57,6 +57,25 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         bb.push_back(Operation(OperationCode::Bra, Immediate(target)));
         break;
     }
+    case OpCode::Id::SSY: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "Constant buffer flow is not supported");
+
+        // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
+        // target of the jump that the SYNC instruction will make. The SSY opcode has a similar
+        // structure to the BRA opcode.
+        bb.push_back(Operation(OperationCode::Ssy, Immediate(pc + instr.bra.GetBranchTarget())));
+        break;
+    }
+    case OpCode::Id::SYNC: {
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
+                             static_cast<u32>(cc));
+
+        // The SYNC opcode jumps to the address previously set by the SSY opcode
+        bb.push_back(Operation(OperationCode::Sync));
+        break;
+    }
     case OpCode::Id::IPA: {
         const auto& attribute = instr.attribute.fmt28;
         const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
-- 
cgit v1.2.3


From 9118deb9904f5bb4012d32c8ed63262b3f6e74a3 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 15 Dec 2018 17:16:14 -0300
Subject: shader_decode: Stub DEPBAR

---
 src/video_core/shader/decode/other.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 4c2d24202..ef0598d4f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -88,6 +88,10 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::DEPBAR: {
+        LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From c849b5b3201e8fda40727c3926b6389f609feafc Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 15 Dec 2018 17:32:51 -0300
Subject: shader_decode: Implement F2F

---
 src/video_core/shader/decode/conversion.cpp | 38 ++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index c6eb2952c..465c63a9e 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -11,12 +11,48 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
 
 u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::F2F_R: {
+        UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in F2F is not implemented");
+
+        Node value = GetRegister(instr.gpr20);
+        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
+
+        value = [&]() {
+            switch (instr.conversion.f2f.rounding) {
+            case Tegra::Shader::F2fRoundingOp::None:
+                return value;
+            case Tegra::Shader::F2fRoundingOp::Round:
+                return Operation(OperationCode::FRoundEven, PRECISE, value);
+            case Tegra::Shader::F2fRoundingOp::Floor:
+                return Operation(OperationCode::FFloor, PRECISE, value);
+            case Tegra::Shader::F2fRoundingOp::Ceil:
+                return Operation(OperationCode::FCeil, PRECISE, value);
+            case Tegra::Shader::F2fRoundingOp::Trunc:
+                return Operation(OperationCode::FTrunc, PRECISE, value);
+            default:
+                UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
+                                  static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+                break;
+            }
+        }();
+        value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 8abe5ba2c8a5eb839849b6554782dfd455e85699 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 01:53:05 -0300
Subject: shader_decode: Implement I2F

---
 src/video_core/shader/decode/conversion.cpp | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 465c63a9e..7c691982d 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -18,6 +18,29 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     switch (opcode->get().GetId()) {
+    case OpCode::Id::I2F_R:
+    case OpCode::Id::I2F_C: {
+        UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
+        UNIMPLEMENTED_IF(instr.conversion.selector);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in I2F is not implemented");
+
+        Node value = [&]() {
+            if (instr.is_b_gpr) {
+                return GetRegister(instr.gpr20);
+            } else {
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            }
+        }();
+        const bool input_signed = instr.conversion.is_input_signed;
+        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
+        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
+        value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
+        value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     case OpCode::Id::F2F_R: {
         UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
         UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
-- 
cgit v1.2.3


From 3052eae25e9a35bdffdd72c2598929e6a9c72607 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 01:57:13 -0300
Subject: shader_decode: Implement F2I

---
 src/video_core/shader/decode/conversion.cpp | 37 +++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 7c691982d..82fe5e21a 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -73,6 +73,43 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::F2I_R:
+    case OpCode::Id::F2I_C: {
+        UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in F2I is not implemented");
+        Node value = [&]() {
+            if (instr.is_b_gpr) {
+                return GetRegister(instr.gpr20);
+            } else {
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            }
+        }();
+
+        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
+
+        value = [&]() {
+            switch (instr.conversion.f2i.rounding) {
+            case Tegra::Shader::F2iRoundingOp::None:
+                return value;
+            case Tegra::Shader::F2iRoundingOp::Floor:
+                return Operation(OperationCode::FFloor, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Ceil:
+                return Operation(OperationCode::FCeil, PRECISE, value);
+            case Tegra::Shader::F2iRoundingOp::Trunc:
+                return Operation(OperationCode::FTrunc, PRECISE, value);
+            default:
+                UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
+                                  static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+            }
+        }();
+        const bool is_signed = instr.conversion.is_output_signed;
+        value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
+        value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed);
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From e444a6553faa019b20a81a3b7380ecccfd7d3725 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 16 Dec 2018 03:57:10 -0300
Subject: shader_decode: Implement FSET

---
 src/video_core/shader/decode/float_set.cpp | 37 +++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index 17d47c17a..355fabc09 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -16,7 +16,42 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
+                                            instr.fset.neg_a != 0);
+
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return GetImmediate19(instr);
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        }
+    }();
+
+    op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
+
+    // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
+    // condition is true, and to 0 otherwise.
+    const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
+    const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
+
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
+    const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    SetRegister(bb, instr.gpr0, value);
+
+    if (instr.generates_cc.Value() != 0) {
+        const Node is_zero = Operation(OperationCode::LogicalFEqual, predicate, Immediate(0.0f));
+        SetInternalFlag(bb, InternalFlag::Zero, is_zero);
+        LOG_WARNING(HW_GPU, "FSET condition code is incomplete");
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 501284a81a60a19713aa0509f3db994617f44659 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 16 Dec 2018 18:19:17 -0300
Subject: shader_decode: Implement BFE

---
 src/video_core/shader/decode/bfe.cpp | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index ffd904c54..6532a3bce 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -16,7 +16,31 @@ u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF(instr.bfe.negate_b);
+
+    Node op_a = GetRegister(instr.gpr8);
+    op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false);
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::BFE_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in BFE is not implemented");
+
+        const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue()));
+        const Node outer_shift_imm =
+            Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
+
+        const Node inner_shift =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
+        const Node outer_shift =
+            Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
+
+        SetRegister(bb, instr.gpr0, outer_shift);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 39f1c6246a3c5140f4c2b9a2ba3cbcaecf9521dd Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:02:15 -0300
Subject: shader_decode: Implement LOP32I

---
 .../shader/decode/arithmetic_integer_immediate.cpp | 68 +++++++++++++++++++++-
 1 file changed, 67 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 46f340235..ee5754161 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -10,15 +10,81 @@
 namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
+using Tegra::Shader::LogicOperation;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::PredicateResultMode;
+using Tegra::Shader::Register;
 
 u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::LOP32I: {
+        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
+                             "Condition codes generation in LOP32I is not implemented");
+
+        if (instr.alu.lop32i.invert_a)
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+
+        if (instr.alu.lop32i.invert_b)
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
+                            Tegra::Shader::PredicateResultMode::None,
+                            Tegra::Shader::Pred::UnusedIndex);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
+                          opcode->get().GetName());
+    }
 
     return pc;
 }
 
+void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
+                                   Node op_a, Node op_b, PredicateResultMode predicate_mode,
+                                   Pred predicate) {
+    const Node result = [&]() {
+        switch (logic_op) {
+        case LogicOperation::And:
+            return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
+        case LogicOperation::Or:
+            return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
+        case LogicOperation::Xor:
+            return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
+        case LogicOperation::PassB:
+            return op_b;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
+        }
+    }();
+
+    if (dest != Register::ZeroIndex) {
+        SetRegister(bb, dest, result);
+    }
+
+    using Tegra::Shader::PredicateResultMode;
+    // Write the predicate value depending on the predicate mode.
+    switch (predicate_mode) {
+    case PredicateResultMode::None:
+        // Do nothing.
+        return;
+    case PredicateResultMode::NotZero: {
+        // Set the predicate to true if the result is not zero.
+        const Node compare = Operation(OperationCode::LogicalIEqual, result, Immediate(0));
+        SetPredicate(bb, static_cast<u64>(predicate), compare);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}",
+                          static_cast<u32>(predicate_mode));
+    }
+}
+
 } // namespace VideoCommon::Shader
\ No newline at end of file
-- 
cgit v1.2.3


From a2819c204f1a72a63ee5e8cc9a9830cd27fb6853 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:05:52 -0300
Subject: shader_decode: Implement SHR

---
 src/video_core/shader/decode/shift.cpp | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 41f5b8cb0..76938fa05 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -16,7 +16,32 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    const Node op_a = GetRegister(instr.gpr8);
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::SHR_C:
+    case OpCode::Id::SHR_R:
+    case OpCode::Id::SHR_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in SHR is not implemented");
+
+        const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
+                                           instr.shift.is_signed, PRECISE, op_a, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From d79c462af067f78eb6fd84b0f02c385464412017 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:05:59 -0300
Subject: shader_decode: Implement SHL

---
 src/video_core/shader/decode/shift.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 76938fa05..3ba039d21 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -39,6 +39,14 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::SHL_C:
+    case OpCode::Id::SHL_R:
+    case OpCode::Id::SHL_IMM:
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in SHL is not implemented");
+        SetRegister(bb, instr.gpr0,
+                    Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b));
+        break;
     default:
         UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From acdbbb88854b4c1dc75353018fcf2e5480cea858 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:08:52 -0300
Subject: shader_decode: Implement LD_C

---
 src/video_core/shader/decode/memory.cpp | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index d8265d3fd..6219f8ee6 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -73,6 +73,37 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::LD_C: {
+        UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
+
+        Node index = GetRegister(instr.gpr8);
+
+        const Node op_a =
+            GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index);
+
+        switch (instr.ld_c.type.Value()) {
+        case Tegra::Shader::UniformType::Single:
+            SetRegister(bb, instr.gpr0, op_a);
+            break;
+
+        case Tegra::Shader::UniformType::Double: {
+            const Node op_b =
+                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
+
+            const Node composite =
+                Operation(OperationCode::Composite, op_a, op_b, GetRegister(RZ), GetRegister(RZ));
+
+            MetaComponents meta{{0, 1, 2, 3}};
+            bb.push_back(Operation(OperationCode::AssignComposite, meta, composite,
+                                   GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1),
+                                   GetRegister(RZ), GetRegister(RZ)));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value()));
+        }
+        break;
+    }
     case OpCode::Id::ST_A: {
         UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
                              "Indirect attribute loads are not supported");
-- 
cgit v1.2.3


From 078ba28e13b4ecd7fe51e361a577a178faa74b3f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:13:00 -0300
Subject: shader_decode: Implement ISET

---
 src/video_core/shader/decode/integer_set.cpp | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 316a7d8ad..eba1c5123 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -16,7 +16,33 @@ u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    const Node op_a = GetRegister(instr.gpr8);
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        }
+    }();
+
+    // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
+    // is true, and to 0 otherwise.
+    const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
+    const Node first_pred =
+        GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
+
+    const Node predicate = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
+    const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From 80183de8846ccf62631d48451535f9a6a4cb8284 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 17:09:23 -0300
Subject: shader_decode: Implement BFI

---
 src/video_core/shader/decode/bfi.cpp | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index b94d46ce6..6a851b22e 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -16,7 +16,28 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF(instr.generates_cc);
+
+    const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::BFI_IMM_R:
+            return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
+        default:
+            UNREACHABLE();
+        }
+    }();
+    const Node insert = GetRegister(instr.gpr8);
+
+    const Node offset =
+        Operation(OperationCode::UBitwiseAnd, NO_PRECISE, packed_shift, Immediate(0xff));
+
+    Node bits =
+        Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, packed_shift, Immediate(8));
+    bits = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, bits, Immediate(0xff));
+
+    const Node value =
+        Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From faadae5814683d8d9252bdb6cafbdb07ea8619e4 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 17:30:33 -0300
Subject: shader_decode: Implement ISETP

---
 .../shader/decode/integer_set_predicate.cpp        | 31 +++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index 10975c394..d76b8018c 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -11,12 +11,41 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
 
 u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    const Node op_a = GetRegister(instr.gpr8);
+
+    const Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        }
+    }();
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
+    const Node predicate =
+        GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
+    const Node value = Operation(combiner, predicate, second_pred);
+    SetPredicate(bb, instr.isetp.pred3, value);
+
+    if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
+        SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From ccb71bece9e6e6c9ceabc0826624f645c5140c53 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:22:18 -0300
Subject: shader_decode: Implement IADD

---
 .../shader/decode/arithmetic_integer.cpp           | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 12c64e97a..47b27ac5b 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -16,7 +16,34 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [&]() {
+        if (instr.is_b_imm) {
+            return Immediate(instr.alu.GetSignedImm20_20());
+        } else if (instr.is_b_gpr) {
+            return GetRegister(instr.gpr20);
+        } else {
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::IADD_C:
+    case OpCode::Id::IADD_R:
+    case OpCode::Id::IADD_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in IADD is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
+
+        SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b));
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 8486e7f8c8367a7cc225da9fbac262a116744108 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:22:44 -0300
Subject: shader_decode: Implement SEL

---
 src/video_core/shader/decode/arithmetic_integer.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 47b27ac5b..429b86813 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -41,6 +41,14 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b));
         break;
     }
+    case OpCode::Id::SEL_C:
+    case OpCode::Id::SEL_R:
+    case OpCode::Id::SEL_IMM: {
+        const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
+        const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 8f37531f8ef94e9a43d33232f4c2da980ce7bf80 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:22:57 -0300
Subject: shader_decode: Implement LOP

---
 src/video_core/shader/decode/arithmetic_integer.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 429b86813..d01336e0e 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -49,6 +49,21 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::LOP_C:
+    case OpCode::Id::LOP_R:
+    case OpCode::Id::LOP_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in LOP is not implemented");
+
+        if (instr.alu.lop.invert_a)
+            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+        if (instr.alu.lop.invert_b)
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+
+        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
+                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From becfdb863845d9ea81c1844c8ee3c681d03fd9ea Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 17:44:20 -0300
Subject: shader_decode: Implement PBK and BRK

---
 src/video_core/shader/decode/other.cpp | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index ef0598d4f..0416d7eaa 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -64,7 +64,19 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
         // target of the jump that the SYNC instruction will make. The SSY opcode has a similar
         // structure to the BRA opcode.
-        bb.push_back(Operation(OperationCode::Ssy, Immediate(pc + instr.bra.GetBranchTarget())));
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        bb.push_back(Operation(OperationCode::Ssy, Immediate(target)));
+        break;
+    }
+    case OpCode::Id::PBK: {
+        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
+                             "Constant buffer PBK is not supported");
+
+        // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
+        // using SYNC on a PBK address will kill the shader execution. We don't emulate this because
+        // it's very unlikely a driver will emit such invalid shader.
+        const u32 target = pc + instr.bra.GetBranchTarget();
+        bb.push_back(Operation(OperationCode::Pbk, Immediate(target)));
         break;
     }
     case OpCode::Id::SYNC: {
@@ -76,6 +88,15 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         bb.push_back(Operation(OperationCode::Sync));
         break;
     }
+    case OpCode::Id::BRK: {
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
+                             static_cast<u32>(cc));
+
+        // The BRK opcode jumps to the address previously set by the PBK opcode
+        bb.push_back(Operation(OperationCode::Brk));
+        break;
+    }
     case OpCode::Id::IPA: {
         const auto& attribute = instr.attribute.fmt28;
         const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
-- 
cgit v1.2.3


From b0e79208385ca3183fd1abdd4c6628268840e9ef Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 18:09:40 -0300
Subject: shader_decode: Implement XMAD

---
 src/video_core/shader/decode/xmad.cpp | 86 ++++++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 27a2fc05d..fcab1fb80 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -16,7 +16,91 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF(instr.xmad.sign_a);
+    UNIMPLEMENTED_IF(instr.xmad.sign_b);
+    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                         "Condition codes generation in XMAD is not implemented");
+
+    Node op_a = GetRegister(instr.gpr8); // instr.xmad.sign_a
+
+    // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
+    UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
+    const bool is_signed_a = instr.xmad.sign_a == 1;
+    const bool is_signed_b = instr.xmad.sign_b == 1;
+    const bool is_signed_c = is_signed_a;
+
+    auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::XMAD_CR:
+            return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+                    GetRegister(instr.gpr39)};
+        case OpCode::Id::XMAD_RR:
+            return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+        case OpCode::Id::XMAD_RC:
+            return {false, GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+        case OpCode::Id::XMAD_IMM:
+            return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
+                    GetRegister(instr.gpr39)};
+        default:
+            UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
+        }
+    }();
+
+    if (instr.xmad.high_a) {
+        op_a = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_a, NO_PRECISE, op_a,
+                               Immediate(16));
+    } else {
+        op_a = SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, NO_PRECISE, op_a,
+                               Immediate(0xffff));
+    }
+
+    const Node original_b = op_b;
+    if (instr.xmad.high_b) {
+        op_b = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_b, NO_PRECISE, op_a,
+                               Immediate(16));
+    } else {
+        op_b = SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, NO_PRECISE, op_b,
+                               Immediate(0xffff));
+    }
+
+    // TODO(Rodrigo): Use an appropiate sign for this operation
+    Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
+    if (instr.xmad.product_shift_left) {
+        product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, Immediate(16));
+    }
+
+    op_c = [&]() {
+        switch (instr.xmad.mode) {
+        case Tegra::Shader::XmadMode::None:
+            return op_c;
+        case Tegra::Shader::XmadMode::CLo:
+            return SignedOperation(OperationCode::IBitwiseAnd, is_signed_c, NO_PRECISE, op_c,
+                                   Immediate(0xffff));
+        case Tegra::Shader::XmadMode::CHi:
+            return SignedOperation(OperationCode::ILogicalShiftRight, is_signed_c, NO_PRECISE, op_c,
+                                   Immediate(16));
+        case Tegra::Shader::XmadMode::CBcc: {
+            const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+                                                   NO_PRECISE, original_b, Immediate(16));
+            return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, op_c, shifted_b);
+        }
+        default: {
+            UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
+        }
+        }
+    }();
+
+    // TODO(Rodrigo): Use an appropiate sign for this operation
+    Node sum = Operation(OperationCode::IAdd, product, op_c);
+    if (is_merge) {
+        const Node a = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, sum, Immediate(0xffff));
+        const Node b =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(0xffff));
+        sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
+    }
+
+    SetRegister(bb, instr.gpr0, sum);
 
     return pc;
 }
-- 
cgit v1.2.3


From 210620ff314c774cd0da5a6b50501dec45914751 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 18:49:48 -0300
Subject: shader_decode: Implement ISCADD

---
 src/video_core/shader/decode/arithmetic_integer.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index d01336e0e..d494af736 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -41,6 +41,21 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b));
         break;
     }
+    case OpCode::Id::ISCADD_C:
+    case OpCode::Id::ISCADD_R:
+    case OpCode::Id::ISCADD_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in ISCADD is not implemented");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
+
+        const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
+        const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
+        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     case OpCode::Id::SEL_C:
     case OpCode::Id::SEL_R:
     case OpCode::Id::SEL_IMM: {
-- 
cgit v1.2.3


From 6ca31f544a4559eca547b45b5158d210932cc428 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 19:03:53 -0300
Subject: shader_decode: Implement BRA internal flag

---
 src/video_core/shader/decode/other.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 0416d7eaa..5b3f9aa30 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -50,11 +50,15 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
                              "BRA with constant buffers are not implemented");
 
-        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
-        UNIMPLEMENTED_IF(cc != Tegra::Shader::ConditionCode::T);
-
         const u32 target = pc + instr.bra.GetBranchTarget();
-        bb.push_back(Operation(OperationCode::Bra, Immediate(target)));
+        const Node branch = Operation(OperationCode::Bra, Immediate(target));
+
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        if (cc != Tegra::Shader::ConditionCode::T) {
+            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
+        } else {
+            bb.push_back(branch);
+        }
         break;
     }
     case OpCode::Id::SSY: {
-- 
cgit v1.2.3


From e8235c0215d51236c5b968de971435b7cf74dc81 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 21:14:25 -0300
Subject: shader_decode: Implement I2I

---
 src/video_core/shader/decode/conversion.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 82fe5e21a..b823b6119 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -18,6 +18,32 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     switch (opcode->get().GetId()) {
+    case OpCode::Id::I2I_R: {
+        UNIMPLEMENTED_IF(instr.conversion.selector);
+
+        const bool input_signed = instr.conversion.is_input_signed;
+        const bool output_signed = instr.conversion.is_output_signed;
+
+        Node value = GetRegister(instr.gpr20);
+        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
+
+        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a,
+                                        input_signed);
+        if (input_signed != output_signed) {
+            value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
+        }
+
+        SetRegister(bb, instr.gpr0, value);
+
+        if (instr.generates_cc) {
+            const Node zero_condition =
+                SignedOperation(OperationCode::LogicalIEqual, output_signed, value, Immediate(0));
+            SetInternalFlag(bb, InternalFlag::Zero, zero_condition);
+            LOG_WARNING(HW_GPU, "I2I Condition codes implementation is incomplete.");
+        }
+
+        break;
+    }
     case OpCode::Id::I2F_R:
     case OpCode::Id::I2F_C: {
         UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
-- 
cgit v1.2.3


From 07944a23455379dcc590735f67d764304c457bd7 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 21:42:59 -0300
Subject: shader_decode: Implement F2F_C

---
 src/video_core/shader/decode/conversion.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index b823b6119..ef46ab7a5 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -67,13 +67,21 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
-    case OpCode::Id::F2F_R: {
+    case OpCode::Id::F2F_R:
+    case OpCode::Id::F2F_C: {
         UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
         UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
         UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                              "Condition codes generation in F2F is not implemented");
 
-        Node value = GetRegister(instr.gpr20);
+        Node value = [&]() {
+            if (instr.is_b_gpr) {
+                return GetRegister(instr.gpr20);
+            } else {
+                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+            }
+        }();
+
         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
 
         value = [&]() {
-- 
cgit v1.2.3


From 518a2bd2060a5c1e6b9acb987439e0009d74fb43 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 22:01:23 -0300
Subject: shader_decode: Implement IMNMX

---
 src/video_core/shader/decode/arithmetic_integer.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index d494af736..dbdcebbb4 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -79,6 +79,22 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
                             instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
         break;
     }
+    case OpCode::Id::IMNMX_C:
+    case OpCode::Id::IMNMX_R:
+    case OpCode::Id::IMNMX_IMM: {
+        UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in IMNMX is not implemented");
+
+        const bool is_signed = instr.imnmx.is_signed;
+
+        const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
+        const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
+        const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
+        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 376a8375118937c577063c5d92ad33cfdc33439b Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 17 Dec 2018 22:18:46 -0300
Subject: shader_decode: Implement MOV_SYS

---
 src/video_core/shader/decode/other.cpp | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 5b3f9aa30..9200b5da9 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -46,6 +46,33 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::KIL: {
+        UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
+
+        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
+        UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
+                             static_cast<u32>(cc));
+
+        bb.push_back(Operation(OperationCode::Kil));
+        break;
+    }
+    case OpCode::Id::MOV_SYS: {
+        switch (instr.sys20) {
+        case Tegra::Shader::SystemVariable::InvocationInfo: {
+            LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
+            SetRegister(bb, instr.gpr0, Immediate(0u));
+            break;
+        }
+        case Tegra::Shader::SystemVariable::Ydirection: {
+            // Config pack's third value is Y_NEGATE's state.
+            SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate));
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value()));
+        }
+        break;
+    }
     case OpCode::Id::BRA: {
         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
                              "BRA with constant buffers are not implemented");
-- 
cgit v1.2.3


From cf4a08d95098370868fb631a0436f2a4968df9af Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 18 Dec 2018 03:16:09 -0300
Subject: shader_decode: Implement HADD2_IMM and HMUL2_IMM

---
 .../shader/decode/arithmetic_half_immediate.cpp    | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 8d8a2dad9..5c280a1a6 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -16,7 +16,34 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
+        UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0);
+    } else {
+        UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None);
+    }
+    UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0,
+                         "Half float immediate saturation not implemented");
+
+    Node op_a = GetRegister(instr.gpr8);
+    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
+
+    const Node op_b = UnpackHalfImmediate(instr, true);
+
+    Node value = [&]() {
+        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}};
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_IMM:
+            return Operation(OperationCode::HAdd, meta, op_a, op_b);
+        case OpCode::Id::HMUL2_IMM:
+            return Operation(OperationCode::HMul, meta, op_a, op_b);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
+
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From 68c99d2597717e5717e725efcfdb2bd53146d08c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:31:46 -0300
Subject: shader_decode: Implement HADD2 and HMUL2

---
 src/video_core/shader/decode/arithmetic_half.cpp | 49 +++++++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 3b189b0d1..a6c6f3174 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -16,7 +16,54 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
+        opcode->get().GetId() == OpCode::Id::HADD2_R) {
+        UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
+    }
+    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0,
+                         "Half float saturation not implemented");
+
+    const bool negate_a =
+        opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
+    const bool negate_b =
+        opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
+
+    const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a);
+
+    // instr.alu_half.type_a
+
+    Node op_b = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_C:
+        case OpCode::Id::HMUL2_C:
+            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+        case OpCode::Id::HADD2_R:
+        case OpCode::Id::HMUL2_R:
+            return GetRegister(instr.gpr20);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+    op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b);
+
+    Node value = [&]() {
+        MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}};
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HADD2_C:
+        case OpCode::Id::HADD2_R:
+            return Operation(OperationCode::HAdd, meta, op_a, op_b);
+        case OpCode::Id::HMUL2_C:
+        case OpCode::Id::HMUL2_R:
+            return Operation(OperationCode::HMul, meta, op_a, op_b);
+        default:
+            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
+            return Immediate(0);
+        }
+    }();
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
+
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From 21f9e9da092ddd96fe9f149660a5bf4f676c8413 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 18 Dec 2018 19:54:12 -0300
Subject: shader_decode: Implement HSETP2

---
 .../shader/decode/half_set_predicate.cpp           | 38 +++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 5fe123ea5..d7d63d50a 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -11,12 +11,48 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
 
 u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0);
+
+    Node op_a = GetRegister(instr.gpr8);
+    op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
+
+    const Node op_b = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HSETP2_R:
+            return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a,
+                                        instr.hsetp2.negate_b);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    // We can't use the constant predicate as destination.
+    ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+    const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
+
+    MetaHalfArithmetic meta = {
+        false, {instr.hsetp2.type_a, instr.hsetp2.type_b}, instr.hsetp2.h_and != 0};
+    const Node first_pred = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
+
+    // Set the primary predicate to the result of Predicate OP SecondPredicate
+    const Node value = Operation(combiner, first_pred, second_pred);
+    SetPredicate(bb, instr.hsetp2.pred3, value);
+
+    if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
+        const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred);
+        SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred));
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 8d42feb09b25825dad786cf311c9e7721c0f6c7c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:33:15 -0300
Subject: shader_decode: Implement LD_L

---
 src/video_core/shader/decode/memory.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 6219f8ee6..49b9a9eab 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -104,6 +104,24 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::LD_L: {
+        UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
+                             static_cast<unsigned>(instr.ld_l.unknown.Value()));
+
+        const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8),
+                                     Immediate(static_cast<s32>(instr.smem_imm)));
+        const Node lmem = GetLocalMemory(index);
+
+        switch (instr.ldst_sl.type.Value()) {
+        case Tegra::Shader::StoreType::Bytes32:
+            SetRegister(bb, instr.gpr0, lmem);
+            break;
+        default:
+            UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
+                              static_cast<unsigned>(instr.ldst_sl.type.Value()));
+        }
+        break;
+    }
     case OpCode::Id::ST_A: {
         UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
                              "Indirect attribute loads are not supported");
-- 
cgit v1.2.3


From b184ca9089a49646d074ef898c151089207ccd76 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 02:33:31 -0300
Subject: shader_decode: Implement ST_L

---
 src/video_core/shader/decode/memory.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 49b9a9eab..c70e2ff02 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -152,6 +152,23 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
         break;
     }
+    case OpCode::Id::ST_L: {
+        // UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
+        //                      static_cast<u32>(instr.st_l.unknown.Value()));
+
+        const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
+                                     Immediate(static_cast<s32>(instr.smem_imm)));
+
+        switch (instr.ldst_sl.type.Value()) {
+        case Tegra::Shader::StoreType::Bytes32:
+            SetLocalMemory(bb, index, GetRegister(instr.gpr0));
+            break;
+        default:
+            UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
+                              static_cast<u32>(instr.ldst_sl.type.Value()));
+        }
+        break;
+    }
     case OpCode::Id::TEX: {
         Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
         const bool is_array = instr.tex.array != 0;
-- 
cgit v1.2.3


From a40fd075164a5f86367dfa7bea4d7815148e63b7 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 18 Dec 2018 22:18:44 -0300
Subject: shader_decode: Implement LOP3

---
 .../shader/decode/arithmetic_integer.cpp           | 60 ++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index dbdcebbb4..145bbcfc8 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -11,6 +11,7 @@ namespace VideoCommon::Shader {
 
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
 
 u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
@@ -79,6 +80,24 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
                             instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
         break;
     }
+    case OpCode::Id::LOP3_C:
+    case OpCode::Id::LOP3_R:
+    case OpCode::Id::LOP3_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in LOP3 is not implemented");
+
+        const Node op_c = GetRegister(instr.gpr39);
+        const Node lut = [&]() {
+            if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
+                return Immediate(instr.alu.lop3.GetImmLut28());
+            } else {
+                return Immediate(instr.alu.lop3.GetImmLut48());
+            }
+        }();
+
+        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut);
+        break;
+    }
     case OpCode::Id::IMNMX_C:
     case OpCode::Id::IMNMX_R:
     case OpCode::Id::IMNMX_IMM: {
@@ -102,4 +121,45 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
     return pc;
 }
 
+void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
+                                    Node imm_lut) {
+    constexpr u32 lop_iterations = 32;
+    const Node one = Immediate(1);
+    const Node two = Immediate(2);
+
+    Node value{};
+    for (u32 i = 0; i < lop_iterations; ++i) {
+        const Node shift_amount = Immediate(i);
+
+        const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount);
+        const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one);
+
+        const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount);
+        const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one);
+        const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one);
+
+        const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount);
+        const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one);
+        const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two);
+
+        const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1);
+        const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2);
+
+        const Node shifted_bit =
+            Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012);
+        const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one);
+
+        const Node right =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount);
+
+        if (i > 0) {
+            value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right);
+        } else {
+            value = right;
+        }
+    }
+
+    SetRegister(bb, dest, value);
+}
+
 } // namespace VideoCommon::Shader
\ No newline at end of file
-- 
cgit v1.2.3


From 4fd06efeb94ff5fc5af4c5e4b9e8a4fa95d3b383 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 19 Dec 2018 00:31:58 -0300
Subject: shader_decode: Implement IADD3

---
 .../shader/decode/arithmetic_integer.cpp           | 61 ++++++++++++++++++++++
 1 file changed, 61 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 145bbcfc8..3b9b9d6d9 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -9,6 +9,7 @@
 
 namespace VideoCommon::Shader {
 
+using Tegra::Shader::IAdd3Height;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
@@ -42,6 +43,66 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b));
         break;
     }
+    case OpCode::Id::IADD3_C:
+    case OpCode::Id::IADD3_R:
+    case OpCode::Id::IADD3_IMM: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in IADD3 is not implemented");
+
+        Node op_c = GetRegister(instr.gpr39);
+
+        const auto ApplyHeight = [&](IAdd3Height height, Node value) {
+            switch (height) {
+            case IAdd3Height::None:
+                return value;
+            case IAdd3Height::LowerHalfWord:
+                return Operation(OperationCode::IBitwiseAnd, NO_PRECISE, value, Immediate(0xffff));
+            case IAdd3Height::UpperHalfWord:
+                return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, value,
+                                 Immediate(16));
+            default:
+                UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
+                return Immediate(0);
+            }
+        };
+
+        if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
+            op_a = ApplyHeight(instr.iadd3.height_a, op_a);
+            op_b = ApplyHeight(instr.iadd3.height_b, op_b);
+            op_c = ApplyHeight(instr.iadd3.height_c, op_c);
+        }
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
+        op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
+        op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
+
+        const Node value = [&]() {
+            const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+            if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
+                return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
+            }
+            const Node shifted = [&]() {
+                switch (instr.iadd3.mode) {
+                case Tegra::Shader::IAdd3Mode::RightShift:
+                    // TODO(tech4me): According to
+                    // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
+                    // The addition between op_a and op_b should be done in uint33, more
+                    // investigation required
+                    return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
+                                     Immediate(16));
+                case Tegra::Shader::IAdd3Mode::LeftShift:
+                    return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
+                                     Immediate(16));
+                default:
+                    return add_ab;
+                }
+            }();
+            return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
+        }();
+
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     case OpCode::Id::ISCADD_C:
     case OpCode::Id::ISCADD_R:
     case OpCode::Id::ISCADD_IMM: {
-- 
cgit v1.2.3


From c9cf899d1852da73e90ead3d5c0eeee58de6152d Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 19 Dec 2018 00:43:23 -0300
Subject: shader_decode: Implement LEA

---
 .../shader/decode/arithmetic_integer.cpp           | 55 ++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 3b9b9d6d9..b12dc5ba8 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -12,6 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::IAdd3Height;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
 using Tegra::Shader::Register;
 
 u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
@@ -175,6 +176,60 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::LEA_R2:
+    case OpCode::Id::LEA_R1:
+    case OpCode::Id::LEA_IMM:
+    case OpCode::Id::LEA_RZ:
+    case OpCode::Id::LEA_HI: {
+        const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> {
+            switch (opcode->get().GetId()) {
+            case OpCode::Id::LEA_R2: {
+                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
+                        Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
+            }
+
+            case OpCode::Id::LEA_R1: {
+                const bool neg = instr.lea.r1.neg != 0;
+                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        GetRegister(instr.gpr20),
+                        Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
+            }
+
+            case OpCode::Id::LEA_IMM: {
+                const bool neg = instr.lea.imm.neg != 0;
+                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
+                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
+            }
+
+            case OpCode::Id::LEA_RZ: {
+                const bool neg = instr.lea.rz.neg != 0;
+                return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
+                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
+                        Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
+            }
+
+            case OpCode::Id::LEA_HI:
+            default:
+                UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
+
+                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
+                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
+            }
+        }();
+
+        UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
+                             "Unhandled LEA Predicate");
+
+        const Node shifted_c =
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c);
+        const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c);
+        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc);
+
+        SetRegister(bb, instr.gpr0, value);
+
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
     }
-- 
cgit v1.2.3


From 946c86f0bb9fd7a2e1331d27c059bcc6e7cb3c99 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 03:12:16 -0300
Subject: shader_decode: Fixup clang-format

---
 src/video_core/shader/decode/arithmetic_half.cpp | 3 +--
 src/video_core/shader/decode/xmad.cpp            | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index a6c6f3174..9547eae5d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -20,8 +20,7 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) {
         opcode->get().GetId() == OpCode::Id::HADD2_R) {
         UNIMPLEMENTED_IF(instr.alu_half.ftz != 0);
     }
-    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0,
-                         "Half float saturation not implemented");
+    UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented");
 
     const bool negate_a =
         opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index fcab1fb80..596f0ddc8 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
     UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                          "Condition codes generation in XMAD is not implemented");
 
-    Node op_a = GetRegister(instr.gpr8); // instr.xmad.sign_a
+    Node op_a = GetRegister(instr.gpr8);
 
     // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
     UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
-- 
cgit v1.2.3


From 59b34b1d76371bc1bf70ca263a1ac63293a8409e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 03:18:54 -0300
Subject: shader_ir: Fixup file inclusions and clang-format

---
 src/video_core/shader/decode/other.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 9200b5da9..9630ef831 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -9,9 +9,9 @@
 
 namespace VideoCommon::Shader {
 
+using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
-using Tegra::Shader::ConditionCode;
 
 u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
-- 
cgit v1.2.3


From 148a6418ede720681f464eca928c7c445f37db79 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 17:25:49 -0300
Subject: shader_decode: Implement FFMA

---
 src/video_core/shader/decode/ffma.cpp | 37 ++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 2044113f0..0adc85476 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -16,7 +16,42 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
+    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented",
+                         instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
+    UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
+                         instr.ffma.tab5980_1.Value());
+    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                         "Condition codes generation in FFMA is not implemented");
+
+    const Node op_a = GetRegister(instr.gpr8);
+
+    auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::FFMA_CR: {
+            return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+                    GetRegister(instr.gpr39)};
+        }
+        case OpCode::Id::FFMA_RR:
+            return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
+        case OpCode::Id::FFMA_RC: {
+            return {GetRegister(instr.gpr39),
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+        }
+        case OpCode::Id::FFMA_IMM:
+            return {GetImmediate19(instr), GetRegister(instr.gpr39)};
+        default:
+            UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
+        }
+    }();
+
+    op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
+    op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
+
+    Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
+    value = GetSaturatedFloat(value, instr.alu.saturate_d);
+
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From fc46ecddb3bca4861babbf610cd64ab9fdc1bb08 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 18:47:22 -0300
Subject: video_core: Return safe values after an assert hits

---
 src/video_core/shader/decode/arithmetic.cpp                   | 1 +
 src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 1 +
 src/video_core/shader/decode/bfi.cpp                          | 1 +
 src/video_core/shader/decode/conversion.cpp                   | 8 ++++----
 src/video_core/shader/decode/ffma.cpp                         | 1 +
 src/video_core/shader/decode/xmad.cpp                         | 8 ++++----
 6 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 9f8c27b3e..ef846bd9a 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -115,6 +115,7 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
             default:
                 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
                                   static_cast<unsigned>(instr.sub_op.Value()));
+                return Immediate(0);
             }
         }();
         value = GetSaturatedFloat(value, instr.alu.saturate_d);
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index ee5754161..57d9f54f7 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -62,6 +62,7 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation
             return op_b;
         default:
             UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op));
+            return Immediate(0);
         }
     }();
 
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index 6a851b22e..a750aca30 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -24,6 +24,7 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
             return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())};
         default:
             UNREACHABLE();
+            return {Immediate(0), Immediate(0)};
         }
     }();
     const Node insert = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index ef46ab7a5..791f03fe0 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -96,11 +96,10 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
                 return Operation(OperationCode::FCeil, PRECISE, value);
             case Tegra::Shader::F2fRoundingOp::Trunc:
                 return Operation(OperationCode::FTrunc, PRECISE, value);
-            default:
-                UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
-                                  static_cast<u32>(instr.conversion.f2f.rounding.Value()));
-                break;
             }
+            UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
+                              static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+            return Immediate(0);
         }();
         value = GetSaturatedFloat(value, instr.alu.saturate_d);
 
@@ -135,6 +134,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
             default:
                 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
                                   static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+                return Immediate(0);
             }
         }();
         const bool is_signed = instr.conversion.is_output_signed;
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 0adc85476..a17ebd6db 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -42,6 +42,7 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
             return {GetImmediate19(instr), GetRegister(instr.gpr39)};
         default:
             UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
+            return {Immediate(0), Immediate(0)};
         }
     }();
 
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 596f0ddc8..0466069ae 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -42,9 +42,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
         case OpCode::Id::XMAD_IMM:
             return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
                     GetRegister(instr.gpr39)};
-        default:
-            UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
         }
+        UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
+        return {false, Immediate(0), Immediate(0)};
     }();
 
     if (instr.xmad.high_a) {
@@ -85,9 +85,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
                                                    NO_PRECISE, original_b, Immediate(16));
             return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, op_c, shifted_b);
         }
-        default: {
+        default:
             UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
-        }
+            return Immediate(0);
         }
     }();
 
-- 
cgit v1.2.3


From af5c6e4ccb0cedc764fff0a62b46cd969c21b006 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 19:11:18 -0300
Subject: shader_decode: Implement IADD32I

---
 src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 57d9f54f7..a158d345a 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -24,6 +24,17 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
     Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
 
     switch (opcode->get().GetId()) {
+    case OpCode::Id::IADD32I: {
+        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
+                             "Condition codes generation in IADD32I is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
+
+        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
+
+        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     case OpCode::Id::LOP32I: {
         UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
                              "Condition codes generation in LOP32I is not implemented");
-- 
cgit v1.2.3


From 2d9136cec60e8feaa4af258b977962b887d675df Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 21 Dec 2018 19:31:55 -0300
Subject: shader_decode: Fixup FSET

---
 src/video_core/shader/decode/float_set.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index 355fabc09..b69d94c2e 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -47,8 +47,8 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) {
 
     SetRegister(bb, instr.gpr0, value);
 
-    if (instr.generates_cc.Value() != 0) {
-        const Node is_zero = Operation(OperationCode::LogicalFEqual, predicate, Immediate(0.0f));
+    if (instr.generates_cc) {
+        const Node is_zero = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
         SetInternalFlag(bb, InternalFlag::Zero, is_zero);
         LOG_WARNING(HW_GPU, "FSET condition code is incomplete");
     }
-- 
cgit v1.2.3


From 03e088a4f44af1212da0c7c23f95293a6e129a35 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 22 Dec 2018 01:20:57 -0300
Subject: shader_ir: Fixup TEX and TEXS and partially fix TLD4 decompiling

---
 src/video_core/shader/decode/memory.cpp | 99 ++++++++++++++++-----------------
 1 file changed, 49 insertions(+), 50 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c70e2ff02..500a32af5 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -183,28 +183,24 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
         const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array);
 
-        if (depth_compare) {
-            SetRegister(bb, instr.gpr0, texture);
-        } else {
-            MetaComponents meta;
-            std::array<Node, 4> dest;
-
-            std::size_t dest_elem = 0;
-            for (std::size_t elem = 0; elem < 4; ++elem) {
-                if (!instr.tex.IsComponentEnabled(elem)) {
-                    // Skip disabled components
-                    continue;
-                }
-                meta.components_map[dest_elem] = static_cast<u32>(elem);
-                dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
-
-                ++dest_elem;
+        MetaComponents meta;
+        std::array<Node, 4> dest;
+
+        std::size_t dest_elem = 0;
+        for (std::size_t elem = 0; elem < 4; ++elem) {
+            if (!instr.tex.IsComponentEnabled(elem)) {
+                // Skip disabled components
+                continue;
             }
-            std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+            meta.components_map[dest_elem] = static_cast<u32>(elem);
+            dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
 
-            bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture,
-                                   dest[0], dest[1], dest[2], dest[3]));
+            ++dest_elem;
         }
+        std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+
+        bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0],
+                               dest[1], dest[2], dest[3]));
         break;
     }
     case OpCode::Id::TEXS: {
@@ -272,7 +268,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         params.push_back(Immediate(static_cast<u32>(instr.tld4.component)));
 
         const auto& sampler = GetSampler(instr.sampler, texture_type, false, depth_compare);
-        const MetaTexture meta{sampler, num_coordinates};
+        MetaTexture meta{sampler, num_coordinates};
 
         const Node texture =
             Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
@@ -331,7 +327,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
         const auto& sampler =
             GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
-        const MetaTexture meta{sampler, num_coords};
+        MetaTexture meta{sampler, num_coords};
 
         WriteTexsInstructionFloat(
             bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params)));
@@ -350,7 +346,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
         switch (instr.txq.query_type) {
         case Tegra::Shader::TextureQueryType::Dimension: {
-            const MetaTexture meta_texture{sampler};
+            MetaTexture meta_texture{sampler};
             const MetaComponents meta_components{{0, 1, 2, 3}};
 
             const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture,
@@ -402,7 +398,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
             texture_type = TextureType::Texture2D;
         }
 
-        const MetaTexture meta_texture{sampler, static_cast<u32>(coords.size())};
+        MetaTexture meta_texture{sampler, static_cast<u32>(coords.size())};
         const Node texture =
             Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords));
 
@@ -474,7 +470,8 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruct
 
 Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                               TextureProcessMode process_mode, bool depth_compare, bool is_array,
-                              std::size_t bias_offset, std::vector<Node>&& coords) {
+                              std::size_t array_offset, std::size_t bias_offset,
+                              std::vector<Node>&& coords) {
     UNIMPLEMENTED_IF_MSG(
         (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
             (texture_type == TextureType::TextureCube && is_array && depth_compare),
@@ -486,26 +483,26 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                             process_mode == TextureProcessMode::LL ||
                             process_mode == TextureProcessMode::LLA;
 
+    // LOD selection (either via bias or explicit textureLod) not supported in GL for
+    // sampler2DArrayShadow and samplerCubeArrayShadow.
     const bool gl_lod_supported =
-        !((texture_type == TextureType::Texture2D && is_array && depth_compare) ||
-          (texture_type == TextureType::TextureCube && !is_array && depth_compare));
+        !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
+          (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
 
     const OperationCode read_method =
         lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture;
 
-    const MetaTexture meta{sampler, static_cast<u32>(coords.size())};
+    UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
 
+    std::optional<u32> array_offset_value;
+    if (is_array)
+        array_offset_value = static_cast<u32>(array_offset);
+    MetaTexture meta{sampler, static_cast<u32>(coords.size()), array_offset_value};
     std::vector<Node> params = std::move(coords);
 
-    if (process_mode != TextureProcessMode::None) {
+    if (process_mode != TextureProcessMode::None && gl_lod_supported) {
         if (process_mode == TextureProcessMode::LZ) {
-            if (gl_lod_supported) {
-                params.push_back(Immediate(0));
-            } else {
-                // Lod 0 is emulated by a big negative bias in scenarios that are not supported by
-                // GLSL
-                params.push_back(Immediate(-1000));
-            }
+            params.push_back(Immediate(0.0f));
         } else {
             // If present, lod or bias are always stored in the register indexed by the gpr20 field
             // with an offset depending on the usage of the other registers
@@ -518,8 +515,8 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
 
 Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
-    const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
-                                   process_mode != Tegra::Shader::TextureProcessMode::LZ);
+    const bool lod_bias_enabled =
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
 
     const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
         texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -536,29 +533,30 @@ Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
     if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
         coords.push_back(Immediate(0.0f));
     }
+    std::size_t array_offset{};
+    if (is_array) {
+        array_offset = coords.size();
+        coords.push_back(GetRegister(array_register));
+    }
     if (depth_compare) {
         // Depth is always stored in the register signaled by gpr20
         // or in the next register if lod or bias are used
         const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
         coords.push_back(GetRegister(depth_register));
     }
-    if (is_array) {
-        coords.push_back(GetRegister(array_register));
-    }
     // Fill ignored coordinates
     while (coords.size() < total_coord_count) {
         coords.push_back(Immediate(0));
     }
 
-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0,
-                          std::move(coords));
+    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
+                          0, std::move(coords));
 }
 
 Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
-
-    const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
-                                   process_mode != Tegra::Shader::TextureProcessMode::LZ);
+    const bool lod_bias_enabled =
+        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
 
     const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
         texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
@@ -577,22 +575,23 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
         coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
     }
 
+    std::size_t array_offset{};
+    if (is_array) {
+        array_offset = coords.size();
+        coords.push_back(GetRegister(array_register));
+    }
     if (depth_compare) {
         // Depth is always stored in the register signaled by gpr20
         // or in the next register if lod or bias are used
         const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
         coords.push_back(GetRegister(depth_register));
     }
-    if (is_array) {
-        coords.push_back(
-            Operation(OperationCode::ICastFloat, NO_PRECISE, GetRegister(array_register)));
-    }
     // Fill ignored coordinates
     while (coords.size() < total_coord_count) {
         coords.push_back(Immediate(0));
     }
 
-    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array,
+    return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset,
                           (coord_count > 2 ? 1 : 0), std::move(coords));
 }
 
-- 
cgit v1.2.3


From ec98e4d842d5ba04b329c866f5c9b1e7314069f2 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 00:38:01 -0300
Subject: shader_decode: Update TLD4 reflecting #1862 changes

---
 src/video_core/shader/decode/memory.cpp | 101 ++++++++++++++++----------------
 1 file changed, 49 insertions(+), 52 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 500a32af5..cfdb92807 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -225,7 +225,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::TLD4: {
-        ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
         ASSERT(instr.tld4.array == 0);
         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
@@ -238,63 +237,29 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
             LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
         }
 
+        const auto texture_type = instr.tld4.texture_type.Value();
         const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
-        auto texture_type = instr.tld4.texture_type.Value();
-        u32 num_coordinates = static_cast<u32>(GetCoordCount(texture_type));
-        if (depth_compare)
-            num_coordinates += 1;
+        const bool is_array = instr.tld4.array != 0;
+        const Node texture = GetTld4Code(instr, texture_type, depth_compare, is_array);
 
-        std::vector<Node> params;
-
-        switch (num_coordinates) {
-        case 2: {
-            params.push_back(GetRegister(instr.gpr8));
-            params.push_back(GetRegister(instr.gpr8.Value() + 1));
-            break;
-        }
-        case 3: {
-            params.push_back(GetRegister(instr.gpr8));
-            params.push_back(GetRegister(instr.gpr8.Value() + 1));
-            params.push_back(GetRegister(instr.gpr8.Value() + 2));
-            break;
-        }
-        default:
-            UNIMPLEMENTED_MSG("Unhandled coordinates number {}", static_cast<u32>(num_coordinates));
-            params.push_back(GetRegister(instr.gpr8));
-            params.push_back(GetRegister(instr.gpr8.Value() + 1));
-            num_coordinates = 2;
-            texture_type = Tegra::Shader::TextureType::Texture2D;
-        }
-        params.push_back(Immediate(static_cast<u32>(instr.tld4.component)));
-
-        const auto& sampler = GetSampler(instr.sampler, texture_type, false, depth_compare);
-        MetaTexture meta{sampler, num_coordinates};
-
-        const Node texture =
-            Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
-
-        if (depth_compare) {
-            SetRegister(bb, instr.gpr0, texture);
-        } else {
-            MetaComponents meta;
-            std::array<Node, 4> dest;
-
-            std::size_t dest_elem = 0;
-            for (std::size_t elem = 0; elem < 4; ++elem) {
-                if (!instr.tex.IsComponentEnabled(elem)) {
-                    // Skip disabled components
-                    continue;
-                }
-                meta.components_map[dest_elem] = static_cast<u32>(elem);
-                dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
+        MetaComponents meta_components;
+        std::array<Node, 4> dest;
 
-                ++dest_elem;
+        std::size_t dest_elem = 0;
+        for (std::size_t elem = 0; elem < 4; ++elem) {
+            if (!instr.tex.IsComponentEnabled(elem)) {
+                // Skip disabled components
+                continue;
             }
-            std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+            meta_components.components_map[dest_elem] = static_cast<u32>(elem);
+            dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
 
-            bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture,
-                                   dest[0], dest[1], dest[2], dest[3]));
+            ++dest_elem;
         }
+        std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+
+        bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture,
+                               dest[0], dest[1], dest[2], dest[3]));
         break;
     }
     case OpCode::Id::TLD4S: {
@@ -595,6 +560,38 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
                           (coord_count > 2 ? 1 : 0), std::move(coords));
 }
 
+Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
+                           bool is_array) {
+    const std::size_t coord_count = GetCoordCount(texture_type);
+    const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
+    const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+    const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+    std::vector<Node> params;
+
+    for (size_t i = 0; i < coord_count; ++i) {
+        params.push_back(GetRegister(coord_register + i));
+    }
+    std::size_t array_offset{};
+    if (is_array) {
+        array_offset = params.size();
+        params.push_back(GetRegister(array_register));
+    }
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+    std::optional<u32> array_offset_value;
+    if (is_array)
+        array_offset_value = static_cast<u32>(array_offset);
+    MetaTexture meta{sampler, static_cast<u32>(params.size()), array_offset_value};
+
+    return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+}
+
 std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
     TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
     std::size_t max_coords, std::size_t max_inputs) {
-- 
cgit v1.2.3


From 55e6786254d33e3501002bf7fbdd52552a0df32a Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 01:18:33 -0300
Subject: shader_decode: Implement TLDS (untested)

---
 src/video_core/shader/decode/memory.cpp | 69 +++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 8 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index cfdb92807..ce3445512 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -204,7 +204,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::TEXS: {
-        Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
+        const TextureType texture_type{instr.texs.GetTextureType()};
         const bool is_array{instr.texs.IsArrayTexture()};
         const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
         const auto process_mode = instr.texs.GetTextureProcessMode();
@@ -373,6 +373,22 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
                                GetRegister(RZ), GetRegister(RZ)));
         break;
     }
+    case OpCode::Id::TLDS: {
+        const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+        const bool is_array{instr.tlds.IsArrayTexture()};
+
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
+                             "AOFFI is not implemented");
+        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
+
+        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
+            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
+        }
+
+        const Node texture = GetTldsCode(instr, texture_type, is_array);
+        WriteTexsInstructionFloat(bb, instr, texture);
+        break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
     }
@@ -576,22 +592,59 @@ Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool dep
     for (size_t i = 0; i < coord_count; ++i) {
         params.push_back(GetRegister(coord_register + i));
     }
-    std::size_t array_offset{};
+    std::optional<u32> array_offset;
     if (is_array) {
-        array_offset = params.size();
+        array_offset = static_cast<u32>(params.size());
         params.push_back(GetRegister(array_register));
     }
 
     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-
-    std::optional<u32> array_offset_value;
-    if (is_array)
-        array_offset_value = static_cast<u32>(array_offset);
-    MetaTexture meta{sampler, static_cast<u32>(params.size()), array_offset_value};
+    MetaTexture meta{sampler, static_cast<u32>(params.size()), array_offset};
 
     return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
 }
 
+Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
+    const std::size_t type_coord_count = GetCoordCount(texture_type);
+    const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
+    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+
+    // If enabled arrays index is always stored in the gpr8 field
+    const u64 array_register = instr.gpr8.Value();
+    // if is array gpr20 is used
+    const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
+
+    const u64 last_coord_register =
+        ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
+            ? static_cast<u64>(instr.gpr20.Value())
+            : coord_register + 1;
+
+    std::vector<Node> params;
+
+    for (std::size_t i = 0; i < type_coord_count; ++i) {
+        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
+        params.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+    }
+    std::optional<u32> array_offset;
+    if (is_array) {
+        array_offset = static_cast<u32>(params.size());
+        params.push_back(GetRegister(array_register));
+    }
+    const auto coords_count = static_cast<u32>(params.size());
+
+    if (lod_enabled) {
+        // When lod is used always is in grp20
+        params.push_back(GetRegister(instr.gpr20));
+    } else {
+        params.push_back(Immediate(0));
+    }
+
+    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+    MetaTexture meta{sampler, coords_count, array_offset};
+
+    return Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
+}
+
 std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
     TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
     std::size_t max_coords, std::size_t max_inputs) {
-- 
cgit v1.2.3


From 027f443e699652fc30a849efaf8c12725a7b5729 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 01:33:47 -0300
Subject: shader_decode: Implement POPC

---
 src/video_core/shader/decode/arithmetic_integer.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index b12dc5ba8..271ce205b 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -119,6 +119,16 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::POPC_C:
+    case OpCode::Id::POPC_R:
+    case OpCode::Id::POPC_IMM: {
+        if (instr.popc.invert) {
+            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+        }
+        const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
+        SetRegister(bb, instr.gpr0, value);
+        break;
+    }
     case OpCode::Id::SEL_C:
     case OpCode::Id::SEL_R:
     case OpCode::Id::SEL_IMM: {
-- 
cgit v1.2.3


From dd91650aaf217196a2b1ced17df24bd74349843d Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 02:26:35 -0300
Subject: shader_decode: Implement HFMA2

---
 src/video_core/shader/decode/hfma2.cpp | 54 +++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 5ce08481e..bf7491804 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <tuple>
+
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -9,6 +11,8 @@
 
 namespace VideoCommon::Shader {
 
+using Tegra::Shader::HalfPrecision;
+using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
@@ -16,7 +20,55 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
+        UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None);
+    } else {
+        UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None);
+    }
+
+    constexpr auto identity = HalfType::H0_H1;
+
+    const HalfType type_a = instr.hfma2.type_a;
+    const Node op_a = GetRegister(instr.gpr8);
+
+    bool neg_b{}, neg_c{};
+    auto [saturate, type_b, op_b, type_c,
+          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HFMA2_CR:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, instr.hfma2.type_b,
+                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
+                    GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_RC:
+            neg_b = instr.hfma2.negate_b;
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
+                    instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+        case OpCode::Id::HFMA2_RR:
+            neg_b = instr.hfma2.rr.negate_b;
+            neg_c = instr.hfma2.rr.negate_c;
+            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
+                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
+        case OpCode::Id::HFMA2_IMM_R:
+            neg_c = instr.hfma2.negate_c;
+            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
+                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
+        default:
+            return {false, identity, Immediate(0), identity, Immediate(0)};
+        }
+    }();
+    UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented");
+
+    op_b = GetOperandAbsNegHalf(op_b, false, neg_b);
+    op_c = GetOperandAbsNegHalf(op_c, false, neg_c);
+
+    MetaHalfArithmetic meta{true, {type_a, type_b, type_c}};
+    Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c);
+    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
+
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From 7e13e8bfcb4d3bb3c9d7eafb81e790e244cdfdd7 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 17:07:49 -0300
Subject: shader_decode: Implement PSET

---
 src/video_core/shader/decode/predicate_set_register.cpp | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index 67a06b5b4..04ddd9f9e 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -16,7 +16,22 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                         "Condition codes generation in PSET is not implemented");
+
+    const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+    const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+    const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
+
+    const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
+    const Node result = Operation(combiner, first_pred, second_pred);
+
+    const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
+    const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
+    const Node value = Operation(OperationCode::Select, PRECISE, true_value, false_value);
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From 3f1136ac6f39e3d0e0f2c250905a79c9b47aa28c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 17:14:43 -0300
Subject: shader_decode: Implement CSETP

---
 .../shader/decode/predicate_set_predicate.cpp      | 51 ++++++++++++++++------
 1 file changed, 37 insertions(+), 14 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
index 24352170d..6ea6daceb 100644
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -17,25 +17,48 @@ u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
-    const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::PSETP: {
+        const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+        const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
 
-    // We can't use the constant predicate as destination.
-    ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+        // We can't use the constant predicate as destination.
+        ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-    const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+        const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
 
-    const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
-    const Node predicate = Operation(combiner, op_a, op_b);
+        const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
+        const Node predicate = Operation(combiner, op_a, op_b);
 
-    // Set the primary predicate to the result of Predicate OP SecondPredicate
-    SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
+        // Set the primary predicate to the result of Predicate OP SecondPredicate
+        SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
 
-    if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
-        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
-        SetPredicate(
-            bb, instr.psetp.pred0,
-            Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), second_pred));
+        if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
+            // enabled
+            SetPredicate(bb, instr.psetp.pred0,
+                         Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
+                                   second_pred));
+        }
+        break;
+    }
+    case OpCode::Id::CSETP: {
+        const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
+        const Node condition_code = GetConditionCode(instr.csetp.cc);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
+
+        if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
+            SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
+        }
+        if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
+            SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
     }
 
     return pc;
-- 
cgit v1.2.3


From 8332482c24136091c3fa2c95d7efdd3dd1fa9adf Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 17:24:18 -0300
Subject: shader_decode: Implement R2P

---
 .../shader/decode/register_set_predicate.cpp       | 29 +++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index 29a348cf5..796039cd9 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -16,7 +16,34 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr);
+
+    const Node apply_mask = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::R2P_IMM:
+            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+        default:
+            UNREACHABLE();
+            return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
+        }
+    }();
+    const Node mask =
+        Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, GetRegister(instr.gpr8),
+                  Immediate(static_cast<u32>(instr.r2p.byte)));
+
+    constexpr u32 programmable_preds = 7;
+    for (u64 pred = 0; pred < programmable_preds; ++pred) {
+        const Node shift = Immediate(1u << static_cast<u32>(pred));
+
+        const Node apply_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift);
+        const Node condition = Operation(OperationCode::LogicalUEqual, apply_compare, Immediate(0));
+
+        const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift);
+        const Node value = Operation(OperationCode::LogicalUEqual, value_compare, Immediate(0));
+
+        const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
+        bb.push_back(Conditional(condition, {code}));
+    }
 
     return pc;
 }
-- 
cgit v1.2.3


From 2df55985b691d659073dce2d857d46bc152b4842 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 23 Dec 2018 20:59:49 -0300
Subject: shader_decode: Rework HSETP2

---
 src/video_core/shader/decode/half_set_predicate.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d7d63d50a..72cc3d5c8 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -39,10 +39,12 @@ u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) {
     const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
 
     const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
+    const OperationCode pair_combiner =
+        instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
 
-    MetaHalfArithmetic meta = {
-        false, {instr.hsetp2.type_a, instr.hsetp2.type_b}, instr.hsetp2.h_and != 0};
-    const Node first_pred = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
+    MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}};
+    const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b);
+    const Node first_pred = Operation(pair_combiner, comparison);
 
     // Set the primary predicate to the result of Predicate OP SecondPredicate
     const Node value = Operation(combiner, first_pred, second_pred);
-- 
cgit v1.2.3


From b11e0b94c7ce0d965a6149c98c48cda967ec3c04 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 24 Dec 2018 00:51:52 -0300
Subject: shader_decode: Implement HSET2

---
 src/video_core/shader/decode/half_set.cpp | 44 ++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index af363d5d2..b4ac06144 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
+
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -16,7 +18,47 @@ u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED();
+    UNIMPLEMENTED_IF(instr.hset2.ftz != 0);
+
+    // instr.hset2.type_a
+    // instr.hset2.type_b
+    Node op_a = GetRegister(instr.gpr8);
+    Node op_b = [&]() {
+        switch (opcode->get().GetId()) {
+        case OpCode::Id::HSET2_R:
+            return GetRegister(instr.gpr20);
+        default:
+            UNREACHABLE();
+            return Immediate(0);
+        }
+    }();
+
+    op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
+    op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
+
+    const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+    MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}};
+    const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b);
+
+    const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
+
+    // HSET2 operates on each half float in the pack.
+    std::array<Node, 2> values;
+    for (u32 i = 0; i < 2; ++i) {
+        const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
+        const Node true_value = Immediate(raw_value << (i * 16));
+        const Node false_value = Immediate(0);
+
+        const Node comparison =
+            Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+        const Node predicate = Operation(combiner, comparison, second_pred);
+
+        values[i] = Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+    }
+
+    const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
-- 
cgit v1.2.3


From a1b845b6514e135a5810b12c20261ec646216c28 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 24 Dec 2018 01:23:00 -0300
Subject: shader_decode: Implement VMAD and VSETP

---
 src/video_core/shader/decode/video.cpp | 120 +++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 src/video_core/shader/decode/video.cpp

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
new file mode 100644
index 000000000..9510896e4
--- /dev/null
+++ b/src/video_core/shader/decode/video.cpp
@@ -0,0 +1,120 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::VideoType;
+using Tegra::Shader::VmadShr;
+
+u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
+    const Instruction instr = {program_code[pc]};
+    const auto opcode = OpCode::Decode(instr);
+
+    const Node op_a =
+        GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
+                        instr.video.type_a, instr.video.byte_height_a);
+    const Node op_b = [&]() {
+        if (instr.video.use_register_b) {
+            return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
+                                   instr.video.signed_b, instr.video.type_b,
+                                   instr.video.byte_height_b);
+        }
+        if (instr.video.signed_b) {
+            const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
+            return Immediate(static_cast<u32>(imm));
+        } else {
+            return Immediate(instr.alu.GetImm20_16());
+        }
+    }();
+
+    switch (opcode->get().GetId()) {
+    case OpCode::Id::VMAD: {
+        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
+                             "Condition codes generation in VMAD is not implemented");
+
+        const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
+        const Node op_c = GetRegister(instr.gpr39);
+
+        Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
+        value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
+
+        if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
+            const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
+            value =
+                SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
+        }
+
+        SetRegister(bb, instr.gpr0, value);
+
+        break;
+    }
+    case OpCode::Id::VSETP: {
+        // We can't use the constant predicate as destination.
+        ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+        const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
+        const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
+        const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
+
+        const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
+
+        // Set the primary predicate to the result of Predicate OP SecondPredicate
+        SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
+
+        if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+            // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+            // if enabled
+            const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
+            SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
+    }
+
+    return pc;
+}
+
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
+                               Tegra::Shader::VideoType type, u64 byte_height) {
+    if (!is_chunk) {
+        const auto offset = static_cast<u32>(byte_height * 8);
+        const Node shift = SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE,
+                                           op, Immediate(offset));
+        return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, shift,
+                               Immediate(0xff));
+    }
+    const Node zero = Immediate(0);
+
+    switch (type) {
+    case Tegra::Shader::VideoType::Size16_Low:
+        return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, op,
+                               Immediate(0xffff));
+    case Tegra::Shader::VideoType::Size16_High:
+        return SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, op,
+                               Immediate(16));
+    case Tegra::Shader::VideoType::Size32:
+        // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
+        // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
+        UNIMPLEMENTED();
+        return zero;
+    case Tegra::Shader::VideoType::Invalid:
+        UNREACHABLE_MSG("Invalid instruction encoding");
+        return zero;
+    default:
+        UNREACHABLE();
+        return zero;
+    }
+}
+
+} // namespace VideoCommon::Shader
\ No newline at end of file
-- 
cgit v1.2.3


From e1fea1e0c594cc7c5a404e7006a4b4b2f29200ae Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 24 Dec 2018 02:24:38 -0300
Subject: video_core: Implement IR based geometry shaders

---
 src/video_core/shader/decode/other.cpp | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 9630ef831..1918762b8 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -12,6 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
+using Tegra::Shader::Register;
 
 u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
@@ -140,6 +141,30 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         SetRegister(bb, instr.gpr0, value);
         break;
     }
+    case OpCode::Id::OUT_R: {
+        UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
+                             "Stream buffer is not supported");
+
+        if (instr.out.emit) {
+            // gpr0 is used to store the next address and gpr8 contains the address to emit.
+            // Hardware uses pointers here but we just ignore it
+            bb.push_back(Operation(OperationCode::EmitVertex));
+            SetRegister(bb, instr.gpr0, Immediate(0));
+        }
+        if (instr.out.cut) {
+            bb.push_back(Operation(OperationCode::EndPrimitive));
+        }
+        break;
+    }
+    case OpCode::Id::ISBERD: {
+        UNIMPLEMENTED_IF(instr.isberd.o != 0);
+        UNIMPLEMENTED_IF(instr.isberd.skew != 0);
+        UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
+        UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
+        LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
+        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
+        break;
+    }
     case OpCode::Id::DEPBAR: {
         LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
         break;
-- 
cgit v1.2.3


From a2e22b435947fd5fb835572c02369af83ceeafce Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 24 Dec 2018 02:26:40 -0300
Subject: shader_decode: Fixup clang-format

---
 src/video_core/shader/decode/half_set.cpp               | 3 ++-
 src/video_core/shader/decode/register_set_predicate.cpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index b4ac06144..e34deeff4 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -54,7 +54,8 @@ u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) {
             Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
         const Node predicate = Operation(combiner, comparison, second_pred);
 
-        values[i] = Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+        values[i] =
+            Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
     }
 
     const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index 796039cd9..bbfe2ce05 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -35,7 +35,8 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) {
     for (u64 pred = 0; pred < programmable_preds; ++pred) {
         const Node shift = Immediate(1u << static_cast<u32>(pred));
 
-        const Node apply_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift);
+        const Node apply_compare =
+            Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift);
         const Node condition = Operation(OperationCode::LogicalUEqual, apply_compare, Immediate(0));
 
         const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift);
-- 
cgit v1.2.3


From 55a10d02e571532bba7a2a7af605a4cda2743d6d Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 24 Dec 2018 02:36:47 -0300
Subject: shader_decode: Fixup PSET

---
 src/video_core/shader/decode/predicate_set_register.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index 04ddd9f9e..6c58496c2 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -26,11 +26,12 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) {
     const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
 
     const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
-    const Node result = Operation(combiner, first_pred, second_pred);
+    const Node predicate = Operation(combiner, first_pred, second_pred);
 
     const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
     const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
-    const Node value = Operation(OperationCode::Select, PRECISE, true_value, false_value);
+    const Node value =
+        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
     SetRegister(bb, instr.gpr0, value);
 
     return pc;
-- 
cgit v1.2.3


From ea78c78253c6183938da6fc87bc763ed93957499 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 24 Dec 2018 18:13:50 -0300
Subject: shader_decode: Fixup WriteLogicOperation zero comparison

---
 src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index a158d345a..3b8a60c6b 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -89,7 +89,7 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation
         return;
     case PredicateResultMode::NotZero: {
         // Set the predicate to true if the result is not zero.
-        const Node compare = Operation(OperationCode::LogicalIEqual, result, Immediate(0));
+        const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
         SetPredicate(bb, static_cast<u64>(predicate), compare);
         break;
     }
-- 
cgit v1.2.3


From c68c13e1aaef63674474861fd7be528a49b72206 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 25 Dec 2018 03:46:14 -0300
Subject: shader_decode: Fixup R2P

---
 src/video_core/shader/decode/register_set_predicate.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index bbfe2ce05..06a3c7539 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -37,10 +37,11 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) {
 
         const Node apply_compare =
             Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift);
-        const Node condition = Operation(OperationCode::LogicalUEqual, apply_compare, Immediate(0));
+        const Node condition =
+            Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
 
         const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift);
-        const Node value = Operation(OperationCode::LogicalUEqual, value_compare, Immediate(0));
+        const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
 
         const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
         bb.push_back(Conditional(condition, {code}));
-- 
cgit v1.2.3


From 5af82a8ed4e2e0b7abc9c7da9f7bb5fa1c83de29 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 26 Dec 2018 01:33:56 -0300
Subject: shader_decode: Implement TEXS.F16

---
 src/video_core/shader/decode/memory.cpp | 38 ++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 13 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ce3445512..679e7f01b 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -219,8 +219,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         if (instr.texs.fp32_flag) {
             WriteTexsInstructionFloat(bb, instr, texture);
         } else {
-            UNIMPLEMENTED();
-            // WriteTexsInstructionHalfFloat(bb, instr, texture);
+            WriteTexsInstructionHalfFloat(bb, instr, texture);
         }
         break;
     }
@@ -416,39 +415,52 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
     return *used_samplers.emplace(entry).first;
 }
 
-void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr,
-                                         Node texture) {
+void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) {
     // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
     // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 
     MetaComponents meta;
     std::array<Node, 4> dest;
-
-    std::size_t written_components = 0;
     for (u32 component = 0; component < 4; ++component) {
         if (!instr.texs.IsComponentEnabled(component)) {
             continue;
         }
-        meta.components_map[written_components] = static_cast<u32>(component);
+        meta.components_map[meta.count] = component;
 
-        if (written_components < 2) {
+        if (meta.count < 2) {
             // Write the first two swizzle components to gpr0 and gpr0+1
-            dest[written_components] = GetRegister(instr.gpr0.Value() + written_components % 2);
+            dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2);
         } else {
             ASSERT(instr.texs.HasTwoDestinations());
             // Write the rest of the swizzle components to gpr28 and gpr28+1
-            dest[written_components] = GetRegister(instr.gpr28.Value() + written_components % 2);
+            dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2);
         }
-
-        ++written_components;
+        ++meta.count;
     }
 
-    std::generate(dest.begin() + written_components, dest.end(), [&]() { return GetRegister(RZ); });
+    std::generate(dest.begin() + meta.count, dest.end(), [&]() { return GetRegister(RZ); });
 
     bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2],
                            dest[3]));
 }
 
+void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) {
+    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+    // float instruction).
+
+    MetaComponents meta;
+    for (u32 component = 0; component < 4; ++component) {
+        if (!instr.texs.IsComponentEnabled(component))
+            continue;
+        meta.components_map[meta.count++] = component;
+    }
+    if (meta.count == 0)
+        return;
+
+    bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture,
+                           GetRegister(instr.gpr0), GetRegister(instr.gpr28)));
+}
+
 Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                               TextureProcessMode process_mode, bool depth_compare, bool is_array,
                               std::size_t array_offset, std::size_t bias_offset,
-- 
cgit v1.2.3


From d9118d324a7f40ad9227e15408be528273743bee Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 26 Dec 2018 01:49:32 -0300
Subject: shader_ir: Remove RZ and use Register::ZeroIndex instead

---
 src/video_core/shader/decode/memory.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 679e7f01b..60bdd9b73 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -91,12 +91,14 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
                 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
 
             const Node composite =
-                Operation(OperationCode::Composite, op_a, op_b, GetRegister(RZ), GetRegister(RZ));
+                Operation(OperationCode::Composite, op_a, op_b, GetRegister(Register::ZeroIndex),
+                          GetRegister(Register::ZeroIndex));
 
             MetaComponents meta{{0, 1, 2, 3}};
             bb.push_back(Operation(OperationCode::AssignComposite, meta, composite,
                                    GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1),
-                                   GetRegister(RZ), GetRegister(RZ)));
+                                   GetRegister(Register::ZeroIndex),
+                                   GetRegister(Register::ZeroIndex)));
             break;
         }
         default:
@@ -197,7 +199,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
             ++dest_elem;
         }
-        std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+        std::generate(dest.begin() + dest_elem, dest.end(),
+                      [&]() { return GetRegister(Register::ZeroIndex); });
 
         bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0],
                                dest[1], dest[2], dest[3]));
@@ -255,7 +258,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
             ++dest_elem;
         }
-        std::generate(dest.begin() + dest_elem, dest.end(), [&]() { return GetRegister(RZ); });
+        std::generate(dest.begin() + dest_elem, dest.end(),
+                      [&]() { return GetRegister(Register::ZeroIndex); });
 
         bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture,
                                dest[0], dest[1], dest[2], dest[3]));
@@ -369,7 +373,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         const MetaComponents meta_composite{{0, 1, 2, 3}};
         bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture,
                                GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1),
-                               GetRegister(RZ), GetRegister(RZ)));
+                               GetRegister(Register::ZeroIndex), GetRegister(Register::ZeroIndex)));
         break;
     }
     case OpCode::Id::TLDS: {
@@ -438,7 +442,8 @@ void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node
         ++meta.count;
     }
 
-    std::generate(dest.begin() + meta.count, dest.end(), [&]() { return GetRegister(RZ); });
+    std::generate(dest.begin() + meta.count, dest.end(),
+                  [&]() { return GetRegister(Register::ZeroIndex); });
 
     bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2],
                            dest[3]));
-- 
cgit v1.2.3


From 52223313b10af4c76b516d6ead247a1a201a71d8 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 26 Dec 2018 02:17:56 -0300
Subject: shader_ir: Remove Ipa primitive

---
 src/video_core/shader/decode/other.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 1918762b8..386433d8e 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -134,9 +134,8 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(),
                                                 instr.ipa.sample_mode.Value()};
 
-        const Node input_attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
-        const Node ipa = Operation(OperationCode::Ipa, input_attr);
-        const Node value = GetSaturatedFloat(ipa, instr.ipa.saturate);
+        const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
+        const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
 
         SetRegister(bb, instr.gpr0, value);
         break;
-- 
cgit v1.2.3


From 2faad9bf23dbcedc80dca7ed9ad4b81c0416dd5e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 26 Dec 2018 02:58:47 -0300
Subject: shader_decode: Use BitfieldExtract instead of shift + and

---
 .../shader/decode/arithmetic_integer.cpp           |  5 ++---
 src/video_core/shader/decode/bfi.cpp               |  9 ++------
 .../shader/decode/register_set_predicate.cpp       | 12 +++++-----
 src/video_core/shader/decode/video.cpp             | 12 +++-------
 src/video_core/shader/decode/xmad.cpp              | 26 +++++-----------------
 5 files changed, 18 insertions(+), 46 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 271ce205b..931e0fa1d 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -57,10 +57,9 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
             case IAdd3Height::None:
                 return value;
             case IAdd3Height::LowerHalfWord:
-                return Operation(OperationCode::IBitwiseAnd, NO_PRECISE, value, Immediate(0xffff));
+                return BitfieldExtract(value, 0, 16);
             case IAdd3Height::UpperHalfWord:
-                return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, value,
-                                 Immediate(16));
+                return BitfieldExtract(value, 16, 16);
             default:
                 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height));
                 return Immediate(0);
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index a750aca30..b0d8d9eba 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -28,13 +28,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
         }
     }();
     const Node insert = GetRegister(instr.gpr8);
-
-    const Node offset =
-        Operation(OperationCode::UBitwiseAnd, NO_PRECISE, packed_shift, Immediate(0xff));
-
-    Node bits =
-        Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, packed_shift, Immediate(8));
-    bits = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, bits, Immediate(0xff));
+    const Node offset = BitfieldExtract(packed_shift, 0, 8);
+    const Node bits = BitfieldExtract(packed_shift, 8, 8);
 
     const Node value =
         Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index 06a3c7539..14bce9fa4 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -27,20 +27,18 @@ u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) {
             return Immediate(static_cast<u32>(instr.r2p.immediate_mask));
         }
     }();
-    const Node mask =
-        Operation(OperationCode::ULogicalShiftRight, NO_PRECISE, GetRegister(instr.gpr8),
-                  Immediate(static_cast<u32>(instr.r2p.byte)));
+    const Node mask = GetRegister(instr.gpr8);
+    const auto offset = static_cast<u32>(instr.r2p.byte) * 8;
 
     constexpr u32 programmable_preds = 7;
     for (u64 pred = 0; pred < programmable_preds; ++pred) {
-        const Node shift = Immediate(1u << static_cast<u32>(pred));
+        const auto shift = static_cast<u32>(pred);
 
-        const Node apply_compare =
-            Operation(OperationCode::UBitwiseAnd, NO_PRECISE, apply_mask, shift);
+        const Node apply_compare = BitfieldExtract(apply_mask, shift, 1);
         const Node condition =
             Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0));
 
-        const Node value_compare = Operation(OperationCode::UBitwiseAnd, NO_PRECISE, mask, shift);
+        const Node value_compare = BitfieldExtract(mask, offset + shift, 1);
         const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0));
 
         const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value);
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 9510896e4..b491fbadb 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -88,21 +88,15 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
 Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
                                Tegra::Shader::VideoType type, u64 byte_height) {
     if (!is_chunk) {
-        const auto offset = static_cast<u32>(byte_height * 8);
-        const Node shift = SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE,
-                                           op, Immediate(offset));
-        return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, shift,
-                               Immediate(0xff));
+        return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
     }
     const Node zero = Immediate(0);
 
     switch (type) {
     case Tegra::Shader::VideoType::Size16_Low:
-        return SignedOperation(OperationCode::IBitwiseAnd, is_signed, NO_PRECISE, op,
-                               Immediate(0xffff));
+        return BitfieldExtract(op, 0, 16);
     case Tegra::Shader::VideoType::Size16_High:
-        return SignedOperation(OperationCode::ILogicalShiftRight, is_signed, NO_PRECISE, op,
-                               Immediate(16));
+        return BitfieldExtract(op, 16, 16);
     case Tegra::Shader::VideoType::Size32:
         // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
         // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 0466069ae..3e37aee4a 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -47,22 +47,10 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
         return {false, Immediate(0), Immediate(0)};
     }();
 
-    if (instr.xmad.high_a) {
-        op_a = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_a, NO_PRECISE, op_a,
-                               Immediate(16));
-    } else {
-        op_a = SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, NO_PRECISE, op_a,
-                               Immediate(0xffff));
-    }
+    op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16);
 
     const Node original_b = op_b;
-    if (instr.xmad.high_b) {
-        op_b = SignedOperation(OperationCode::ILogicalShiftRight, is_signed_b, NO_PRECISE, op_a,
-                               Immediate(16));
-    } else {
-        op_b = SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, NO_PRECISE, op_b,
-                               Immediate(0xffff));
-    }
+    op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16);
 
     // TODO(Rodrigo): Use an appropiate sign for this operation
     Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
@@ -75,11 +63,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
         case Tegra::Shader::XmadMode::None:
             return op_c;
         case Tegra::Shader::XmadMode::CLo:
-            return SignedOperation(OperationCode::IBitwiseAnd, is_signed_c, NO_PRECISE, op_c,
-                                   Immediate(0xffff));
+            return BitfieldExtract(op_c, 0, 16);
         case Tegra::Shader::XmadMode::CHi:
-            return SignedOperation(OperationCode::ILogicalShiftRight, is_signed_c, NO_PRECISE, op_c,
-                                   Immediate(16));
+            return BitfieldExtract(op_c, 16, 16);
         case Tegra::Shader::XmadMode::CBcc: {
             const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
                                                    NO_PRECISE, original_b, Immediate(16));
@@ -94,9 +80,9 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
     // TODO(Rodrigo): Use an appropiate sign for this operation
     Node sum = Operation(OperationCode::IAdd, product, op_c);
     if (is_merge) {
-        const Node a = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, sum, Immediate(0xffff));
+        const Node a = BitfieldExtract(sum, 0, 16);
         const Node b =
-            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(0xffff));
+            Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16));
         sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
     }
 
-- 
cgit v1.2.3


From 50195b1704bcdf22d379d31b143172a32ebdfaec Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 26 Dec 2018 03:18:11 -0300
Subject: shader_decode: Use proper primitive names

---
 src/video_core/shader/decode/memory.cpp |  4 ++--
 src/video_core/shader/decode/other.cpp  | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 60bdd9b73..f3f78a662 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -155,8 +155,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::ST_L: {
-        // UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
-        //                      static_cast<u32>(instr.st_l.unknown.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
+                             static_cast<u32>(instr.st_l.unknown.Value()));
 
         const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
                                      Immediate(static_cast<s32>(instr.smem_imm)));
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 386433d8e..6e6795ba7 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -54,7 +54,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}",
                              static_cast<u32>(cc));
 
-        bb.push_back(Operation(OperationCode::Kil));
+        bb.push_back(Operation(OperationCode::Discard));
         break;
     }
     case OpCode::Id::MOV_SYS: {
@@ -79,7 +79,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
                              "BRA with constant buffers are not implemented");
 
         const u32 target = pc + instr.bra.GetBranchTarget();
-        const Node branch = Operation(OperationCode::Bra, Immediate(target));
+        const Node branch = Operation(OperationCode::Branch, Immediate(target));
 
         const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
         if (cc != Tegra::Shader::ConditionCode::T) {
@@ -97,7 +97,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         // target of the jump that the SYNC instruction will make. The SSY opcode has a similar
         // structure to the BRA opcode.
         const u32 target = pc + instr.bra.GetBranchTarget();
-        bb.push_back(Operation(OperationCode::Ssy, Immediate(target)));
+        bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
         break;
     }
     case OpCode::Id::PBK: {
@@ -108,7 +108,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
         // using SYNC on a PBK address will kill the shader execution. We don't emulate this because
         // it's very unlikely a driver will emit such invalid shader.
         const u32 target = pc + instr.bra.GetBranchTarget();
-        bb.push_back(Operation(OperationCode::Pbk, Immediate(target)));
+        bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target)));
         break;
     }
     case OpCode::Id::SYNC: {
@@ -117,7 +117,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
                              static_cast<u32>(cc));
 
         // The SYNC opcode jumps to the address previously set by the SSY opcode
-        bb.push_back(Operation(OperationCode::Sync));
+        bb.push_back(Operation(OperationCode::PopFlowStack));
         break;
     }
     case OpCode::Id::BRK: {
@@ -126,7 +126,7 @@ u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
                              static_cast<u32>(cc));
 
         // The BRK opcode jumps to the address previously set by the PBK opcode
-        bb.push_back(Operation(OperationCode::Brk));
+        bb.push_back(Operation(OperationCode::PopFlowStack));
         break;
     }
     case OpCode::Id::IPA: {
-- 
cgit v1.2.3


From d911740e5d474ae459f9e05d82a7dba9c7e06340 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 27 Dec 2018 01:50:22 -0300
Subject: shader_ir: Remove composite primitives and use temporals instead

---
 src/video_core/shader/decode/memory.cpp | 294 ++++++++++++++++----------------
 1 file changed, 149 insertions(+), 145 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index f3f78a662..5ae3f344d 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -90,15 +90,10 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
             const Node op_b =
                 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
 
-            const Node composite =
-                Operation(OperationCode::Composite, op_a, op_b, GetRegister(Register::ZeroIndex),
-                          GetRegister(Register::ZeroIndex));
-
-            MetaComponents meta{{0, 1, 2, 3}};
-            bb.push_back(Operation(OperationCode::AssignComposite, meta, composite,
-                                   GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1),
-                                   GetRegister(Register::ZeroIndex),
-                                   GetRegister(Register::ZeroIndex)));
+            SetTemporal(bb, 0, op_a);
+            SetTemporal(bb, 1, op_b);
+            SetRegister(bb, instr.gpr0, GetTemporal(0));
+            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
             break;
         }
         default:
@@ -172,10 +167,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::TEX: {
-        Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
-        const bool is_array = instr.tex.array != 0;
-        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
-        const auto process_mode = instr.tex.GetTextureProcessMode();
         UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
 
@@ -183,27 +174,12 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
             LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
         }
 
-        const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array);
-
-        MetaComponents meta;
-        std::array<Node, 4> dest;
-
-        std::size_t dest_elem = 0;
-        for (std::size_t elem = 0; elem < 4; ++elem) {
-            if (!instr.tex.IsComponentEnabled(elem)) {
-                // Skip disabled components
-                continue;
-            }
-            meta.components_map[dest_elem] = static_cast<u32>(elem);
-            dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
-
-            ++dest_elem;
-        }
-        std::generate(dest.begin() + dest_elem, dest.end(),
-                      [&]() { return GetRegister(Register::ZeroIndex); });
-
-        bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0],
-                               dest[1], dest[2], dest[3]));
+        const TextureType texture_type{instr.tex.texture_type};
+        const bool is_array = instr.tex.array != 0;
+        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
+        const auto process_mode = instr.tex.GetTextureProcessMode();
+        WriteTexInstructionFloat(
+            bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
         break;
     }
     case OpCode::Id::TEXS: {
@@ -216,13 +192,13 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
             LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
         }
 
-        const Node texture =
+        const Node4 components =
             GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
 
         if (instr.texs.fp32_flag) {
-            WriteTexsInstructionFloat(bb, instr, texture);
+            WriteTexsInstructionFloat(bb, instr, components);
         } else {
-            WriteTexsInstructionHalfFloat(bb, instr, texture);
+            WriteTexsInstructionHalfFloat(bb, instr, components);
         }
         break;
     }
@@ -242,27 +218,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         const auto texture_type = instr.tld4.texture_type.Value();
         const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
         const bool is_array = instr.tld4.array != 0;
-        const Node texture = GetTld4Code(instr, texture_type, depth_compare, is_array);
-
-        MetaComponents meta_components;
-        std::array<Node, 4> dest;
-
-        std::size_t dest_elem = 0;
-        for (std::size_t elem = 0; elem < 4; ++elem) {
-            if (!instr.tex.IsComponentEnabled(elem)) {
-                // Skip disabled components
-                continue;
-            }
-            meta_components.components_map[dest_elem] = static_cast<u32>(elem);
-            dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem);
-
-            ++dest_elem;
-        }
-        std::generate(dest.begin() + dest_elem, dest.end(),
-                      [&]() { return GetRegister(Register::ZeroIndex); });
-
-        bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture,
-                               dest[0], dest[1], dest[2], dest[3]));
+        WriteTexInstructionFloat(bb, instr,
+                                 GetTld4Code(instr, texture_type, depth_compare, is_array));
         break;
     }
     case OpCode::Id::TLD4S: {
@@ -277,28 +234,34 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
         const Node op_a = GetRegister(instr.gpr8);
         const Node op_b = GetRegister(instr.gpr20);
 
-        std::vector<Node> params;
+        std::vector<Node> coords;
 
         // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
         if (depth_compare) {
             // Note: TLD4S coordinate encoding works just like TEXS's
             const Node op_y = GetRegister(instr.gpr8.Value() + 1);
-            params.push_back(op_a);
-            params.push_back(op_y);
-            params.push_back(op_b);
+            coords.push_back(op_a);
+            coords.push_back(op_y);
+            coords.push_back(op_b);
         } else {
-            params.push_back(op_a);
-            params.push_back(op_b);
+            coords.push_back(op_a);
+            coords.push_back(op_b);
         }
-        const auto num_coords = static_cast<u32>(params.size());
-        params.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
+        const auto num_coords = static_cast<u32>(coords.size());
+        coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
 
         const auto& sampler =
             GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
-        MetaTexture meta{sampler, num_coords};
 
-        WriteTexsInstructionFloat(
-            bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params)));
+        Node4 values;
+        for (u32 element = 0; element < values.size(); ++element) {
+            auto params = coords;
+            MetaTexture meta{sampler, element, num_coords};
+            values[element] =
+                Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+        }
+
+        WriteTexsInstructionFloat(bb, instr, values);
         break;
     }
     case OpCode::Id::TXQ: {
@@ -314,18 +277,15 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
 
         switch (instr.txq.query_type) {
         case Tegra::Shader::TextureQueryType::Dimension: {
-            MetaTexture meta_texture{sampler};
-            const MetaComponents meta_components{{0, 1, 2, 3}};
-
-            const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture,
-                                           GetRegister(instr.gpr8));
-            std::array<Node, 4> dest;
-            for (std::size_t i = 0; i < dest.size(); ++i) {
-                dest[i] = GetRegister(instr.gpr0.Value() + i);
+            for (u32 element = 0; element < 4; ++element) {
+                MetaTexture meta{sampler, element};
+                const Node value = Operation(OperationCode::F4TextureQueryDimensions,
+                                             std::move(meta), GetRegister(instr.gpr8));
+                SetTemporal(bb, element, value);
+            }
+            for (u32 i = 0; i < 4; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
             }
-
-            bb.push_back(Operation(OperationCode::AssignComposite, meta_components, texture,
-                                   dest[0], dest[1], dest[2], dest[3]));
             break;
         }
         default:
@@ -366,14 +326,17 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
             texture_type = TextureType::Texture2D;
         }
 
-        MetaTexture meta_texture{sampler, static_cast<u32>(coords.size())};
-        const Node texture =
-            Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords));
+        for (u32 element = 0; element < 2; ++element) {
+            auto params = coords;
+            MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())};
+            const Node value =
+                Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params));
+            SetTemporal(bb, element, value);
+        }
+        for (u32 element = 0; element < 2; ++element) {
+            SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
+        }
 
-        const MetaComponents meta_composite{{0, 1, 2, 3}};
-        bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture,
-                               GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1),
-                               GetRegister(Register::ZeroIndex), GetRegister(Register::ZeroIndex)));
         break;
     }
     case OpCode::Id::TLDS: {
@@ -388,8 +351,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
             LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
         }
 
-        const Node texture = GetTldsCode(instr, texture_type, is_array);
-        WriteTexsInstructionFloat(bb, instr, texture);
+        WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
         break;
     }
     default:
@@ -419,57 +381,80 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
     return *used_samplers.emplace(entry).first;
 }
 
-void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) {
+void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr,
+                                        const Node4& components) {
+    u32 dest_elem = 0;
+    for (u32 elem = 0; elem < 4; ++elem) {
+        if (!instr.tex.IsComponentEnabled(elem)) {
+            // Skip disabled components
+            continue;
+        }
+        SetTemporal(bb, dest_elem++, components[elem]);
+    }
+    // After writing values in temporals, move them to the real registers
+    for (u32 i = 0; i < dest_elem; ++i) {
+        SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
+    }
+}
+
+void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr,
+                                         const Node4& components) {
     // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
     // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 
-    MetaComponents meta;
-    std::array<Node, 4> dest;
+    u32 dest_elem = 0;
     for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component)) {
+        if (!instr.texs.IsComponentEnabled(component))
             continue;
-        }
-        meta.components_map[meta.count] = component;
+        SetTemporal(bb, dest_elem++, components[component]);
+    }
 
-        if (meta.count < 2) {
+    for (u32 i = 0; i < dest_elem; ++i) {
+        if (i < 2) {
             // Write the first two swizzle components to gpr0 and gpr0+1
-            dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2);
+            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
         } else {
             ASSERT(instr.texs.HasTwoDestinations());
             // Write the rest of the swizzle components to gpr28 and gpr28+1
-            dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2);
+            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
         }
-        ++meta.count;
     }
-
-    std::generate(dest.begin() + meta.count, dest.end(),
-                  [&]() { return GetRegister(Register::ZeroIndex); });
-
-    bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2],
-                           dest[3]));
 }
 
-void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) {
+void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr,
+                                             const Node4& components) {
     // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
     // float instruction).
 
-    MetaComponents meta;
+    Node4 values;
+    u32 dest_elem = 0;
     for (u32 component = 0; component < 4; ++component) {
         if (!instr.texs.IsComponentEnabled(component))
             continue;
-        meta.components_map[meta.count++] = component;
+        values[dest_elem++] = components[component];
     }
-    if (meta.count == 0)
+    if (dest_elem == 0)
         return;
 
-    bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture,
-                           GetRegister(instr.gpr0), GetRegister(instr.gpr28)));
+    std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
+
+    const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
+    if (dest_elem <= 2) {
+        SetRegister(bb, instr.gpr0, first_value);
+        return;
+    }
+
+    SetTemporal(bb, 0, first_value);
+    SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
+
+    SetRegister(bb, instr.gpr0, GetTemporal(0));
+    SetRegister(bb, instr.gpr28, GetTemporal(1));
 }
 
-Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
-                              TextureProcessMode process_mode, bool depth_compare, bool is_array,
-                              std::size_t array_offset, std::size_t bias_offset,
-                              std::vector<Node>&& coords) {
+Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
+                               TextureProcessMode process_mode, bool depth_compare, bool is_array,
+                               std::size_t array_offset, std::size_t bias_offset,
+                               std::vector<Node>&& coords) {
     UNIMPLEMENTED_IF_MSG(
         (texture_type == TextureType::Texture3D && (is_array || depth_compare)) ||
             (texture_type == TextureType::TextureCube && is_array && depth_compare),
@@ -495,24 +480,31 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
     std::optional<u32> array_offset_value;
     if (is_array)
         array_offset_value = static_cast<u32>(array_offset);
-    MetaTexture meta{sampler, static_cast<u32>(coords.size()), array_offset_value};
-    std::vector<Node> params = std::move(coords);
+
+    const auto coords_count = static_cast<u32>(coords.size());
 
     if (process_mode != TextureProcessMode::None && gl_lod_supported) {
         if (process_mode == TextureProcessMode::LZ) {
-            params.push_back(Immediate(0.0f));
+            coords.push_back(Immediate(0.0f));
         } else {
-            // If present, lod or bias are always stored in the register indexed by the gpr20 field
-            // with an offset depending on the usage of the other registers
-            params.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
+            // If present, lod or bias are always stored in the register indexed by the gpr20
+            // field with an offset depending on the usage of the other registers
+            coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
         }
     }
 
-    return Operation(read_method, meta, std::move(params));
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto params = coords;
+        MetaTexture meta{sampler, element, coords_count, array_offset_value};
+        values[element] = Operation(read_method, std::move(meta), std::move(params));
+    }
+
+    return values;
 }
 
-Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
-                          TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
+                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
     const bool lod_bias_enabled =
         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
 
@@ -551,8 +543,8 @@ Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
                           0, std::move(coords));
 }
 
-Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
-                           TextureProcessMode process_mode, bool depth_compare, bool is_array) {
+Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
+                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
     const bool lod_bias_enabled =
         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
 
@@ -593,8 +585,8 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
                           (coord_count > 2 ? 1 : 0), std::move(coords));
 }
 
-Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                           bool is_array) {
+Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
+                            bool is_array) {
     const std::size_t coord_count = GetCoordCount(texture_type);
     const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
     const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -604,24 +596,31 @@ Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool dep
     // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
     const u64 coord_register = array_register + (is_array ? 1 : 0);
 
-    std::vector<Node> params;
+    std::vector<Node> coords;
 
     for (size_t i = 0; i < coord_count; ++i) {
-        params.push_back(GetRegister(coord_register + i));
+        coords.push_back(GetRegister(coord_register + i));
     }
     std::optional<u32> array_offset;
     if (is_array) {
-        array_offset = static_cast<u32>(params.size());
-        params.push_back(GetRegister(array_register));
+        array_offset = static_cast<u32>(coords.size());
+        coords.push_back(GetRegister(array_register));
     }
 
     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-    MetaTexture meta{sampler, static_cast<u32>(params.size()), array_offset};
 
-    return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto params = coords;
+        MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset};
+        values[element] =
+            Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params));
+    }
+
+    return values;
 }
 
-Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
+Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
     const std::size_t type_coord_count = GetCoordCount(texture_type);
     const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0);
     const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
@@ -636,36 +635,41 @@ Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_
             ? static_cast<u64>(instr.gpr20.Value())
             : coord_register + 1;
 
-    std::vector<Node> params;
+    std::vector<Node> coords;
 
     for (std::size_t i = 0; i < type_coord_count; ++i) {
         const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
-        params.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
     }
     std::optional<u32> array_offset;
     if (is_array) {
-        array_offset = static_cast<u32>(params.size());
-        params.push_back(GetRegister(array_register));
+        array_offset = static_cast<u32>(coords.size());
+        coords.push_back(GetRegister(array_register));
     }
-    const auto coords_count = static_cast<u32>(params.size());
+    const auto coords_count = static_cast<u32>(coords.size());
 
     if (lod_enabled) {
         // When lod is used always is in grp20
-        params.push_back(GetRegister(instr.gpr20));
+        coords.push_back(GetRegister(instr.gpr20));
     } else {
-        params.push_back(Immediate(0));
+        coords.push_back(Immediate(0));
     }
 
     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
-    MetaTexture meta{sampler, coords_count, array_offset};
 
-    return Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
+    Node4 values;
+    for (u32 element = 0; element < values.size(); ++element) {
+        auto params = coords;
+        MetaTexture meta{sampler, element, coords_count, array_offset};
+        values[element] =
+            Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params));
+    }
+    return values;
 }
 
 std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
     TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
     std::size_t max_coords, std::size_t max_inputs) {
-
     const std::size_t coord_count = GetCoordCount(texture_type);
 
     std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
-- 
cgit v1.2.3


From 2d6c064e66bac4cb871aa26a12066441a8852008 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 27 Dec 2018 16:50:36 -0300
Subject: shader_decode: Improve zero flag implementation

---
 src/video_core/shader/decode/arithmetic.cpp        | 15 ++++-------
 .../shader/decode/arithmetic_immediate.cpp         |  6 ++---
 .../shader/decode/arithmetic_integer.cpp           | 31 ++++++++++------------
 .../shader/decode/arithmetic_integer_immediate.cpp | 18 +++++--------
 src/video_core/shader/decode/bfe.cpp               |  1 +
 src/video_core/shader/decode/bfi.cpp               |  4 +--
 src/video_core/shader/decode/conversion.cpp        | 11 +++-----
 src/video_core/shader/decode/ffma.cpp              |  3 +--
 src/video_core/shader/decode/float_set.cpp         | 11 ++++----
 .../shader/decode/predicate_set_register.cpp       |  6 +++++
 src/video_core/shader/decode/shift.cpp             | 14 +++++-----
 src/video_core/shader/decode/video.cpp             |  5 +---
 src/video_core/shader/decode/xmad.cpp              |  1 +
 13 files changed, 53 insertions(+), 73 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index ef846bd9a..926abcc8e 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -45,8 +45,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(
             instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
             instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in FMUL is not implemented");
 
         op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
 
@@ -75,21 +73,20 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
 
         value = GetSaturatedFloat(value, instr.alu.saturate_d);
 
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::FADD_C:
     case OpCode::Id::FADD_R:
     case OpCode::Id::FADD_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in FADD is not implemented");
-
         op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
         op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
 
         Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
         value = GetSaturatedFloat(value, instr.alu.saturate_d);
 
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
@@ -126,9 +123,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
     case OpCode::Id::FMNMX_C:
     case OpCode::Id::FMNMX_R:
     case OpCode::Id::FMNMX_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in FMNMX is not implemented");
-
         op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
         op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
 
@@ -136,9 +130,10 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
 
         const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
         const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
+        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
 
-        SetRegister(bb, instr.gpr0,
-                    Operation(OperationCode::Select, NO_PRECISE, condition, min, max));
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::RRO_C:
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 996b2537a..1c6da94b4 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -22,24 +22,22 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::FMUL32_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                             "Condition codes generation in FMUL32 is not implemented");
         Node value =
             Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
         value = GetSaturatedFloat(value, instr.fmul32.saturate);
 
+        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::FADD32I: {
-        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                             "Condition codes generation in FADD32I is not implemented");
         const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
                                                 instr.fadd32i.negate_a);
         const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
                                                 instr.fadd32i.negate_b);
 
         const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
+        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 931e0fa1d..edd1695f4 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -34,22 +34,20 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
     case OpCode::Id::IADD_C:
     case OpCode::Id::IADD_R:
     case OpCode::Id::IADD_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in IADD is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
 
         op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
         op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
 
-        SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b));
+        const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+
+        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::IADD3_C:
     case OpCode::Id::IADD3_R:
     case OpCode::Id::IADD3_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in IADD3 is not implemented");
-
         Node op_c = GetRegister(instr.gpr39);
 
         const auto ApplyHeight = [&](IAdd3Height height, Node value) {
@@ -100,6 +98,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
             return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
         }();
 
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
@@ -115,6 +114,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
         const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
         const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
@@ -139,24 +140,19 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
     case OpCode::Id::LOP_C:
     case OpCode::Id::LOP_R:
     case OpCode::Id::LOP_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in LOP is not implemented");
-
         if (instr.alu.lop.invert_a)
             op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
         if (instr.alu.lop.invert_b)
             op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
 
         WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
-                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
+                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
+                            instr.generates_cc);
         break;
     }
     case OpCode::Id::LOP3_C:
     case OpCode::Id::LOP3_R:
     case OpCode::Id::LOP3_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in LOP3 is not implemented");
-
         const Node op_c = GetRegister(instr.gpr39);
         const Node lut = [&]() {
             if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
@@ -166,15 +162,13 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
             }
         }();
 
-        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut);
+        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
         break;
     }
     case OpCode::Id::IMNMX_C:
     case OpCode::Id::IMNMX_R:
     case OpCode::Id::IMNMX_IMM: {
         UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in IMNMX is not implemented");
 
         const bool is_signed = instr.imnmx.is_signed;
 
@@ -182,6 +176,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
         const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
         const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
         const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
+
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
@@ -247,7 +243,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
 }
 
 void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
-                                    Node imm_lut) {
+                                    Node imm_lut, bool sets_cc) {
     constexpr u32 lop_iterations = 32;
     const Node one = Immediate(1);
     const Node two = Immediate(2);
@@ -284,6 +280,7 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No
         }
     }
 
+    SetInternalFlagsFromInteger(bb, value, sets_cc);
     SetRegister(bb, dest, value);
 }
 
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 3b8a60c6b..3cbaeeaf5 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -25,20 +25,17 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::IADD32I: {
-        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                             "Condition codes generation in IADD32I is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
 
         op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
 
         const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+
+        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::LOP32I: {
-        UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                             "Condition codes generation in LOP32I is not implemented");
-
         if (instr.alu.lop32i.invert_a)
             op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
 
@@ -46,8 +43,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
             op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
 
         WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
-                            Tegra::Shader::PredicateResultMode::None,
-                            Tegra::Shader::Pred::UnusedIndex);
+                            PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
         break;
     }
     default:
@@ -60,7 +56,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
 
 void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
                                    Node op_a, Node op_b, PredicateResultMode predicate_mode,
-                                   Pred predicate) {
+                                   Pred predicate, bool sets_cc) {
     const Node result = [&]() {
         switch (logic_op) {
         case LogicOperation::And:
@@ -77,11 +73,9 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation
         }
     }();
 
-    if (dest != Register::ZeroIndex) {
-        SetRegister(bb, dest, result);
-    }
+    SetInternalFlagsFromInteger(bb, result, sets_cc);
+    SetRegister(bb, dest, result);
 
-    using Tegra::Shader::PredicateResultMode;
     // Write the predicate value depending on the predicate mode.
     switch (predicate_mode) {
     case PredicateResultMode::None:
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index 6532a3bce..d3244fd40 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -35,6 +35,7 @@ u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) {
         const Node outer_shift =
             Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
 
+        SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
         SetRegister(bb, instr.gpr0, outer_shift);
         break;
     }
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index b0d8d9eba..ddb1872c6 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -16,8 +16,6 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    UNIMPLEMENTED_IF(instr.generates_cc);
-
     const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
         switch (opcode->get().GetId()) {
         case OpCode::Id::BFI_IMM_R:
@@ -33,6 +31,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
 
     const Node value =
         Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
+
+    SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
     SetRegister(bb, instr.gpr0, value);
 
     return pc;
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 791f03fe0..d5c75e8eb 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -33,15 +33,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
             value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
         }
 
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
-
-        if (instr.generates_cc) {
-            const Node zero_condition =
-                SignedOperation(OperationCode::LogicalIEqual, output_signed, value, Immediate(0));
-            SetInternalFlag(bb, InternalFlag::Zero, zero_condition);
-            LOG_WARNING(HW_GPU, "I2I Condition codes implementation is incomplete.");
-        }
-
         break;
     }
     case OpCode::Id::I2F_R:
@@ -64,6 +57,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
         value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
         value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
 
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
@@ -103,6 +97,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
         }();
         value = GetSaturatedFloat(value, instr.alu.saturate_d);
 
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index a17ebd6db..f3ab3d2e8 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -21,8 +21,6 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
                          instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
     UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
                          instr.ffma.tab5980_1.Value());
-    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                         "Condition codes generation in FFMA is not implemented");
 
     const Node op_a = GetRegister(instr.gpr8);
 
@@ -52,6 +50,7 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
     Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
     value = GetSaturatedFloat(value, instr.alu.saturate_d);
 
+    SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
     SetRegister(bb, instr.gpr0, value);
 
     return pc;
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index b69d94c2e..8e266cc4e 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -45,13 +45,12 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) {
     const Node value =
         Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
 
-    SetRegister(bb, instr.gpr0, value);
-
-    if (instr.generates_cc) {
-        const Node is_zero = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
-        SetInternalFlag(bb, InternalFlag::Zero, is_zero);
-        LOG_WARNING(HW_GPU, "FSET condition code is incomplete");
+    if (instr.fset.bf) {
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    } else {
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
     }
+    SetRegister(bb, instr.gpr0, value);
 
     return pc;
 }
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index 6c58496c2..58d20ceb5 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -32,6 +32,12 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) {
     const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
     const Node value =
         Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
+
+    if (instr.pset.bf) {
+        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
+    } else {
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+    }
     SetRegister(bb, instr.gpr0, value);
 
     return pc;
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 3ba039d21..e8ffdb818 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -31,22 +31,20 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) {
     case OpCode::Id::SHR_C:
     case OpCode::Id::SHR_R:
     case OpCode::Id::SHR_IMM: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in SHR is not implemented");
-
         const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
                                            instr.shift.is_signed, PRECISE, op_a, op_b);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
         break;
     }
     case OpCode::Id::SHL_C:
     case OpCode::Id::SHL_R:
-    case OpCode::Id::SHL_IMM:
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in SHL is not implemented");
-        SetRegister(bb, instr.gpr0,
-                    Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b));
+    case OpCode::Id::SHL_IMM: {
+        const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
+        SetRegister(bb, instr.gpr0, value);
         break;
+    }
     default:
         UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
     }
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b491fbadb..609b3a257 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -38,9 +38,6 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
 
     switch (opcode->get().GetId()) {
     case OpCode::Id::VMAD: {
-        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                             "Condition codes generation in VMAD is not implemented");
-
         const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
         const Node op_c = GetRegister(instr.gpr39);
 
@@ -53,8 +50,8 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
                 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
         }
 
+        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
         SetRegister(bb, instr.gpr0, value);
-
         break;
     }
     case OpCode::Id::VSETP: {
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 3e37aee4a..88f1be27d 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -86,6 +86,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
         sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
     }
 
+    SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
     SetRegister(bb, instr.gpr0, sum);
 
     return pc;
-- 
cgit v1.2.3


From 170c8212bbb10129dfbaed8eb7ab67138c932af2 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 28 Dec 2018 20:00:36 -0300
Subject: shader_ir: Pass to decoder functions basic block's code

---
 src/video_core/shader/decode/arithmetic.cpp                   | 2 +-
 src/video_core/shader/decode/arithmetic_half.cpp              | 2 +-
 src/video_core/shader/decode/arithmetic_half_immediate.cpp    | 2 +-
 src/video_core/shader/decode/arithmetic_immediate.cpp         | 2 +-
 src/video_core/shader/decode/arithmetic_integer.cpp           | 2 +-
 src/video_core/shader/decode/arithmetic_integer_immediate.cpp | 2 +-
 src/video_core/shader/decode/bfe.cpp                          | 2 +-
 src/video_core/shader/decode/bfi.cpp                          | 2 +-
 src/video_core/shader/decode/conversion.cpp                   | 2 +-
 src/video_core/shader/decode/ffma.cpp                         | 2 +-
 src/video_core/shader/decode/float_set.cpp                    | 2 +-
 src/video_core/shader/decode/float_set_predicate.cpp          | 2 +-
 src/video_core/shader/decode/half_set.cpp                     | 2 +-
 src/video_core/shader/decode/half_set_predicate.cpp           | 2 +-
 src/video_core/shader/decode/hfma2.cpp                        | 2 +-
 src/video_core/shader/decode/integer_set.cpp                  | 2 +-
 src/video_core/shader/decode/integer_set_predicate.cpp        | 2 +-
 src/video_core/shader/decode/memory.cpp                       | 2 +-
 src/video_core/shader/decode/other.cpp                        | 2 +-
 src/video_core/shader/decode/predicate_set_predicate.cpp      | 2 +-
 src/video_core/shader/decode/predicate_set_register.cpp       | 2 +-
 src/video_core/shader/decode/register_set_predicate.cpp       | 2 +-
 src/video_core/shader/decode/shift.cpp                        | 2 +-
 src/video_core/shader/decode/video.cpp                        | 2 +-
 src/video_core/shader/decode/xmad.cpp                         | 2 +-
 25 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 926abcc8e..e7847f614 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::SubOp;
 
-u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index 9547eae5d..a237dcb92 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 5c280a1a6..7b4f7d284 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 1c6da94b4..4fd3db54e 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index edd1695f4..4a8cc1a1c 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -15,7 +15,7 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::Register;
 
-u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 3cbaeeaf5..b26a6e473 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::Pred;
 using Tegra::Shader::PredicateResultMode;
 using Tegra::Shader::Register;
 
-u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index d3244fd40..0734141b0 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index ddb1872c6..942d6729d 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index d5c75e8eb..ee18d3a99 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 
-u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index f3ab3d2e8..be8dc2230 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index 8e266cc4e..ba846f1bd 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 5dd085fea..e88b04d18 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 
-u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index e34deeff4..dfd7cb98f 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -14,7 +14,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 72cc3d5c8..53c44ae5a 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 
-u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index bf7491804..4a6b945f9 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -16,7 +16,7 @@ using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeHfma2(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index eba1c5123..85e67b03b 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index d76b8018c..c8b105a08 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 
-u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 5ae3f344d..ae71672d6 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -35,7 +35,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
     }
 }
 
-u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 6e6795ba7..c1e5f4efb 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -14,7 +14,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 
-u32 ShaderIR::DecodeOther(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
index 6ea6daceb..1717f0653 100644
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@@ -13,7 +13,7 @@ using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 
-u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index 58d20ceb5..8bd15fb00 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
index 14bce9fa4..bdb4424a6 100644
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index e8ffdb818..85026bb37 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 609b3a257..c3432356d 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -15,7 +15,7 @@ using Tegra::Shader::Pred;
 using Tegra::Shader::VideoType;
 using Tegra::Shader::VmadShr;
 
-u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 88f1be27d..3ceabecb5 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -12,7 +12,7 @@ namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
-u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
+u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-- 
cgit v1.2.3


From 1c9c4eefeb1d40a9c0ca29c528e71ee1e918a967 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 30 Dec 2018 01:05:14 -0300
Subject: shader_decode: Fixup XMAD

---
 src/video_core/shader/decode/xmad.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 3ceabecb5..9f2d636b8 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -55,7 +55,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     // TODO(Rodrigo): Use an appropiate sign for this operation
     Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b);
     if (instr.xmad.product_shift_left) {
-        product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, Immediate(16));
+        product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
     }
 
     op_c = [&]() {
-- 
cgit v1.2.3


From a63d7c49fc928d0ad440213a5a409a2f1f05afed Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Tue, 15 Jan 2019 21:06:05 -0300
Subject: shader_ir: Fixup clang build

---
 src/video_core/shader/decode/xmad.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/video_core/shader/decode')

diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 9f2d636b8..0cd9cd1cc 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -58,18 +58,20 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
     }
 
+    const Node original_c = op_c;
     op_c = [&]() {
         switch (instr.xmad.mode) {
         case Tegra::Shader::XmadMode::None:
-            return op_c;
+            return original_c;
         case Tegra::Shader::XmadMode::CLo:
-            return BitfieldExtract(op_c, 0, 16);
+            return BitfieldExtract(original_c, 0, 16);
         case Tegra::Shader::XmadMode::CHi:
-            return BitfieldExtract(op_c, 16, 16);
+            return BitfieldExtract(original_c, 16, 16);
         case Tegra::Shader::XmadMode::CBcc: {
             const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
                                                    NO_PRECISE, original_b, Immediate(16));
-            return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, op_c, shifted_b);
+            return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c,
+                                   shifted_b);
         }
         default:
             UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value()));
-- 
cgit v1.2.3