summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_classes/codecs/codec.h1
-rw-r--r--src/video_core/engines/maxwell_3d.h7
-rw-r--r--src/video_core/engines/shader_bytecode.h184
-rw-r--r--src/video_core/gpu.cpp29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp12
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h9
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp50
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h3
-rw-r--r--src/video_core/shader/async_shaders.cpp32
-rw-r--r--src/video_core/shader/decode/image.cpp4
14 files changed, 212 insertions, 138 deletions
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 5bbe6a332..ee5d62540 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -10,6 +10,7 @@
extern "C" {
#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#endif
#include <libavcodec/avcodec.h>
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1cbe8fe67..b0d9559d0 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -755,7 +755,11 @@ public:
u32 data_upload;
- INSERT_UNION_PADDING_WORDS(0x44);
+ INSERT_UNION_PADDING_WORDS(0x16);
+
+ u32 force_early_fragment_tests;
+
+ INSERT_UNION_PADDING_WORDS(0x2D);
struct {
union {
@@ -1572,6 +1576,7 @@ ASSERT_REG_POSITION(shadow_ram_control, 0x49);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
+ASSERT_REG_POSITION(force_early_fragment_tests, 0x84);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index a3c05d1b0..37d17efdc 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -32,31 +32,31 @@ struct Register {
constexpr Register() = default;
- constexpr Register(u64 value) : value(value) {}
+ constexpr Register(u64 value_) : value(value_) {}
- constexpr operator u64() const {
+ [[nodiscard]] constexpr operator u64() const {
return value;
}
template <typename T>
- constexpr u64 operator-(const T& oth) const {
+ [[nodiscard]] constexpr u64 operator-(const T& oth) const {
return value - oth;
}
template <typename T>
- constexpr u64 operator&(const T& oth) const {
+ [[nodiscard]] constexpr u64 operator&(const T& oth) const {
return value & oth;
}
- constexpr u64 operator&(const Register& oth) const {
+ [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
return value & oth.value;
}
- constexpr u64 operator~() const {
+ [[nodiscard]] constexpr u64 operator~() const {
return ~value;
}
- u64 GetSwizzledIndex(u64 elem) const {
+ [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
elem = (value + elem) & 3;
return (value & ~3) + elem;
}
@@ -75,7 +75,7 @@ enum class AttributeSize : u64 {
union Attribute {
Attribute() = default;
- constexpr explicit Attribute(u64 value) : value(value) {}
+ constexpr explicit Attribute(u64 value_) : value(value_) {}
enum class Index : u64 {
LayerViewportPointSize = 6,
@@ -107,7 +107,7 @@ union Attribute {
BitField<31, 1, u64> patch;
BitField<47, 3, AttributeSize> size;
- bool IsPhysical() const {
+ [[nodiscard]] bool IsPhysical() const {
return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
}
} fmt20;
@@ -124,7 +124,7 @@ union Attribute {
union Sampler {
Sampler() = default;
- constexpr explicit Sampler(u64 value) : value(value) {}
+ constexpr explicit Sampler(u64 value_) : value(value_) {}
enum class Index : u64 {
Sampler_0 = 8,
@@ -137,7 +137,7 @@ union Sampler {
union Image {
Image() = default;
- constexpr explicit Image(u64 value) : value{value} {}
+ constexpr explicit Image(u64 value_) : value{value_} {}
BitField<36, 13, u64> index;
u64 value;
@@ -505,14 +505,14 @@ struct IpaMode {
IpaInterpMode interpolation_mode;
IpaSampleMode sampling_mode;
- bool operator==(const IpaMode& a) const {
+ [[nodiscard]] bool operator==(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) ==
std::tie(a.interpolation_mode, a.sampling_mode);
}
- bool operator!=(const IpaMode& a) const {
+ [[nodiscard]] bool operator!=(const IpaMode& a) const {
return !operator==(a);
}
- bool operator<(const IpaMode& a) const {
+ [[nodiscard]] bool operator<(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) <
std::tie(a.interpolation_mode, a.sampling_mode);
}
@@ -658,10 +658,10 @@ union Instruction {
return *this;
}
- constexpr Instruction(u64 value) : value{value} {}
+ constexpr Instruction(u64 value_) : value{value_} {}
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
- constexpr bool Bit(u64 offset) const {
+ [[nodiscard]] constexpr bool Bit(u64 offset) const {
return ((value >> offset) & 1) != 0;
}
@@ -746,34 +746,34 @@ union Instruction {
BitField<28, 8, u64> imm_lut28;
BitField<48, 8, u64> imm_lut48;
- u32 GetImmLut28() const {
+ [[nodiscard]] u32 GetImmLut28() const {
return static_cast<u32>(imm_lut28);
}
- u32 GetImmLut48() const {
+ [[nodiscard]] u32 GetImmLut48() const {
return static_cast<u32>(imm_lut48);
}
} lop3;
- u16 GetImm20_16() const {
+ [[nodiscard]] u16 GetImm20_16() const {
return static_cast<u16>(imm20_16);
}
- u32 GetImm20_19() const {
+ [[nodiscard]] u32 GetImm20_19() const {
u32 imm{static_cast<u32>(imm20_19)};
imm <<= 12;
imm |= negate_imm ? 0x80000000 : 0;
return imm;
}
- u32 GetImm20_32() const {
+ [[nodiscard]] u32 GetImm20_32() const {
return static_cast<u32>(imm20_32);
}
- s32 GetSignedImm20_20() const {
- u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
+ [[nodiscard]] s32 GetSignedImm20_20() const {
+ const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
// Sign extend the 20-bit value.
- u32 mask = 1U << (20 - 1);
+ const auto mask = 1U << (20 - 1);
return static_cast<s32>((immediate ^ mask) - mask);
}
} alu;
@@ -857,7 +857,7 @@ union Instruction {
BitField<56, 1, u64> second_negate;
BitField<30, 9, u64> second;
- u32 PackImmediates() const {
+ [[nodiscard]] u32 PackImmediates() const {
// Immediates are half floats shifted.
constexpr u32 imm_shift = 6;
return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
@@ -1033,7 +1033,7 @@ union Instruction {
BitField<28, 2, AtomicType> type;
BitField<30, 22, s64> offset;
- s32 GetImmediateOffset() const {
+ [[nodiscard]] s32 GetImmediateOffset() const {
return static_cast<s32>(offset << 2);
}
} atoms;
@@ -1215,7 +1215,7 @@ union Instruction {
BitField<39, 4, u64> rounding;
// H0, H1 extract for F16 missing
BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
- F2fRoundingOp GetRoundingMode() const {
+ [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
constexpr u64 rounding_mask = 0x0B;
return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
}
@@ -1239,15 +1239,15 @@ union Instruction {
BitField<54, 1, u64> aoffi_flag;
BitField<55, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(std::size_t component) const {
- return ((1ull << component) & component_mask) != 0;
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1271,15 +1271,15 @@ union Instruction {
BitField<36, 1, u64> aoffi_flag;
BitField<37, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1299,7 +1299,7 @@ union Instruction {
BitField<31, 4, u64> component_mask;
BitField<49, 1, u64> nodep_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NODEP:
return nodep_flag != 0;
@@ -1309,7 +1309,7 @@ union Instruction {
return false;
}
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
} txq;
@@ -1321,11 +1321,11 @@ union Instruction {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
- bool IsComponentEnabled(std::size_t component) const {
- return ((1ull << component) & component_mask) != 0;
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return (ndv_flag != 0);
@@ -1347,7 +1347,7 @@ union Instruction {
BitField<54, 2, u64> offset_mode;
BitField<56, 2, u64> component;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return ndv_flag != 0;
@@ -1373,7 +1373,7 @@ union Instruction {
BitField<33, 2, u64> offset_mode;
BitField<37, 2, u64> component;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return ndv_flag != 0;
@@ -1399,7 +1399,7 @@ union Instruction {
BitField<52, 2, u64> component;
BitField<55, 1, u64> fp16_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1422,16 +1422,20 @@ union Instruction {
BitField<53, 4, u64> texture_info;
BitField<59, 1, u64> fp32_flag;
- TextureType GetTextureType() const {
+ [[nodiscard]] TextureType GetTextureType() const {
// The TEXS instruction has a weird encoding for the texture type.
- if (texture_info == 0)
+ if (texture_info == 0) {
return TextureType::Texture1D;
- if (texture_info >= 1 && texture_info <= 9)
+ }
+ if (texture_info >= 1 && texture_info <= 9) {
return TextureType::Texture2D;
- if (texture_info >= 10 && texture_info <= 11)
+ }
+ if (texture_info >= 10 && texture_info <= 11) {
return TextureType::Texture3D;
- if (texture_info >= 12 && texture_info <= 13)
+ }
+ if (texture_info >= 12 && texture_info <= 13) {
return TextureType::TextureCube;
+ }
LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
static_cast<u32>(texture_info.Value()));
@@ -1439,7 +1443,7 @@ union Instruction {
return TextureType::Texture1D;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
switch (texture_info) {
case 0:
case 2:
@@ -1458,7 +1462,7 @@ union Instruction {
return TextureProcessMode::None;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
@@ -1470,16 +1474,16 @@ union Instruction {
return false;
}
- bool IsArrayTexture() const {
+ [[nodiscard]] bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info >= 7 && texture_info <= 9;
}
- bool HasTwoDestinations() const {
+ [[nodiscard]] bool HasTwoDestinations() const {
return gpr28.Value() != Register::ZeroIndex;
}
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
{},
{0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -1506,7 +1510,7 @@ union Instruction {
BitField<54, 1, u64> cl;
BitField<55, 1, u64> process_mode;
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
}
} tld;
@@ -1516,7 +1520,7 @@ union Instruction {
BitField<53, 4, u64> texture_info;
BitField<59, 1, u64> fp32_flag;
- TextureType GetTextureType() const {
+ [[nodiscard]] TextureType GetTextureType() const {
// The TLDS instruction has a weird encoding for the texture type.
if (texture_info <= 1) {
return TextureType::Texture1D;
@@ -1535,13 +1539,14 @@ union Instruction {
return TextureType::Texture1D;
}
- TextureProcessMode GetTextureProcessMode() const {
- if (texture_info == 1 || texture_info == 5 || texture_info == 12)
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
+ if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
return TextureProcessMode::LL;
+ }
return TextureProcessMode::LZ;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::AOFFI:
return texture_info == 12 || texture_info == 4;
@@ -1555,7 +1560,7 @@ union Instruction {
return false;
}
- bool IsArrayTexture() const {
+ [[nodiscard]] bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info == 8;
}
@@ -1567,7 +1572,7 @@ union Instruction {
BitField<35, 1, u64> aoffi_flag;
BitField<49, 1, u64> nodep_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::AOFFI:
return aoffi_flag != 0;
@@ -1591,7 +1596,7 @@ union Instruction {
BitField<20, 3, StoreType> store_data_layout;
BitField<20, 4, u64> component_mask_selector;
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
ASSERT(mode == SurfaceDataMode::P);
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
@@ -1604,7 +1609,7 @@ union Instruction {
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
}
- StoreType GetStoreDataLayout() const {
+ [[nodiscard]] StoreType GetStoreDataLayout() const {
ASSERT(mode == SurfaceDataMode::D_BA);
return store_data_layout;
}
@@ -1622,14 +1627,15 @@ union Instruction {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
- s32 GetBranchTarget() const {
+ [[nodiscard]] s32 GetBranchTarget() const {
// Sign extend the branch target offset
- u32 mask = 1U << (24 - 1);
- u32 value = static_cast<u32>(target);
+ const auto mask = 1U << (24 - 1);
+ const auto target_value = static_cast<u32>(target);
+ constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
+
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
- 1;
+ return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
}
} bra;
@@ -1637,14 +1643,15 @@ union Instruction {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
- s32 GetBranchExtend() const {
+ [[nodiscard]] s32 GetBranchExtend() const {
// Sign extend the branch target offset
- u32 mask = 1U << (24 - 1);
- u32 value = static_cast<u32>(target);
+ const auto mask = 1U << (24 - 1);
+ const auto target_value = static_cast<u32>(target);
+ constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
+
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
- 1;
+ return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
}
} brx;
@@ -1697,7 +1704,7 @@ union Instruction {
BitField<50, 1, u64> is_op_b_register;
BitField<51, 3, VmnmxOperation> operation;
- VmnmxType SourceFormatA() const {
+ [[nodiscard]] VmnmxType SourceFormatA() const {
switch (src_format_a) {
case 0b11:
return VmnmxType::Bits32;
@@ -1708,7 +1715,7 @@ union Instruction {
}
}
- VmnmxType SourceFormatB() const {
+ [[nodiscard]] VmnmxType SourceFormatB() const {
switch (src_format_b) {
case 0b11:
return VmnmxType::Bits32;
@@ -1739,7 +1746,7 @@ union Instruction {
BitField<20, 14, u64> shifted_offset;
BitField<34, 5, u64> index;
- u64 GetOffset() const {
+ [[nodiscard]] u64 GetOffset() const {
return shifted_offset * 4;
}
} cbuf34;
@@ -1748,7 +1755,7 @@ union Instruction {
BitField<20, 16, s64> offset;
BitField<36, 5, u64> index;
- s64 GetOffset() const {
+ [[nodiscard]] s64 GetOffset() const {
return offset;
}
} cbuf36;
@@ -1997,29 +2004,29 @@ public:
/// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
/// conditionally executed).
- static bool IsPredicatedInstruction(Id opcode) {
+ [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
// TODO(Subv): Add the rest of unpredicated instructions.
return opcode != Id::SSY && opcode != Id::PBK;
}
class Matcher {
public:
- constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
- : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
+ constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
+ : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
- constexpr const char* GetName() const {
+ [[nodiscard]] constexpr const char* GetName() const {
return name;
}
- constexpr u16 GetMask() const {
+ [[nodiscard]] constexpr u16 GetMask() const {
return mask;
}
- constexpr Id GetId() const {
+ [[nodiscard]] constexpr Id GetId() const {
return id;
}
- constexpr Type GetType() const {
+ [[nodiscard]] constexpr Type GetType() const {
return type;
}
@@ -2028,7 +2035,7 @@ public:
* @param instruction The instruction to test
* @returns true if the given instruction matches.
*/
- constexpr bool Matches(u16 instruction) const {
+ [[nodiscard]] constexpr bool Matches(u16 instruction) const {
return (instruction & mask) == expected;
}
@@ -2040,7 +2047,8 @@ public:
Type type;
};
- static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
+ using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
+ [[nodiscard]] static DecodeResult Decode(Instruction instr) {
static const auto table{GetDecodeTable()};
const auto matches_instruction = [instr](const auto& matcher) {
@@ -2062,7 +2070,7 @@ private:
* A '0' in a bitstring indicates that a zero must be present at that bit position.
* A '1' in a bitstring indicates that a one must be present at that bit position.
*/
- static constexpr auto GetMaskAndExpect(const char* const bitstring) {
+ [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
for (std::size_t i = 0; i < opcode_bitsize; i++) {
const std::size_t bit_position = opcode_bitsize - i - 1;
@@ -2084,14 +2092,14 @@ private:
public:
/// Creates a matcher that can match and parse instructions based on bitstring.
- static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
- const char* const name) {
+ [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
+ Type type, const char* const name) {
const auto [mask, expected] = GetMaskAndExpect(bitstring);
return Matcher(name, mask, expected, op, type);
}
};
- static std::vector<Matcher> GetDecodeTable() {
+ [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
std::vector<Matcher> table = {
#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index ebd149c3a..e91f52938 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -95,22 +95,29 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
if (!is_async) {
return;
}
+ if (syncpoint_id == UINT32_MAX) {
+ // TODO: Research what this does.
+ LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
+ return;
+ }
MICROPROFILE_SCOPE(GPU_wait);
std::unique_lock lock{sync_mutex};
- sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; });
+ sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; });
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
- syncpoints[syncpoint_id]++;
+ auto& syncpoint = syncpoints.at(syncpoint_id);
+ syncpoint++;
std::lock_guard lock{sync_mutex};
sync_cv.notify_all();
- if (!syncpt_interrupts[syncpoint_id].empty()) {
- u32 value = syncpoints[syncpoint_id].load();
- auto it = syncpt_interrupts[syncpoint_id].begin();
- while (it != syncpt_interrupts[syncpoint_id].end()) {
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
+ if (!interrupt.empty()) {
+ u32 value = syncpoint.load();
+ auto it = interrupt.begin();
+ while (it != interrupt.end()) {
if (value >= *it) {
TriggerCpuInterrupt(syncpoint_id, *it);
- it = syncpt_interrupts[syncpoint_id].erase(it);
+ it = interrupt.erase(it);
continue;
}
it++;
@@ -119,22 +126,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
}
u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
- return syncpoints[syncpoint_id].load();
+ return syncpoints.at(syncpoint_id).load();
}
void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
- auto& interrupt = syncpt_interrupts[syncpoint_id];
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
bool contains = std::any_of(interrupt.begin(), interrupt.end(),
[value](u32 in_value) { return in_value == value; });
if (contains) {
return;
}
- syncpt_interrupts[syncpoint_id].emplace_back(value);
+ interrupt.emplace_back(value);
}
bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
std::lock_guard lock{sync_mutex};
- auto& interrupt = syncpt_interrupts[syncpoint_id];
+ auto& interrupt = syncpt_interrupts.at(syncpoint_id);
const auto iter =
std::find_if(interrupt.begin(), interrupt.end(),
[value](u32 interrupt_value) { return value == interrupt_value; });
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 36bf92808..cfddbde5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1156,7 +1156,7 @@ void RasterizerOpenGL::SyncViewport() {
flags[Dirty::ClipControl] = false;
bool flip_y = false;
- if (regs.viewport_transform[0].scale_y < 0.0) {
+ if (regs.viewport_transform[0].scale_y < 0.0f) {
flip_y = !flip_y;
}
if (regs.screen_y_control.y_negate != 0) {
@@ -1579,10 +1579,6 @@ void RasterizerOpenGL::SyncAlphaTest() {
flags[Dirty::AlphaTest] = false;
const auto& regs = maxwell3d.regs;
- if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
- LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
- }
-
if (regs.alpha_test_enabled) {
glEnable(GL_ALPHA_TEST);
glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2ccca1993..c869bb0e2 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -151,8 +151,8 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
rasterizer->TickFrame();
- render_window.PollEvents();
context->SwapBuffers();
+ render_window.OnFrameDisplayed();
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index da5c550ea..5ec43db11 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -8,6 +8,7 @@
#include <boost/functional/hash.hpp>
+#include "common/bit_cast.h"
#include "common/cityhash.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@@ -45,7 +46,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
regs.polygon_offset_fill_enable};
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
- raw = 0;
+ raw1 = 0;
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -60,7 +61,14 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
topology.Assign(regs.draw.topology);
- std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
+ raw2 = 0;
+ const auto test_func =
+ regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
+ alpha_test_func.Assign(PackComparisonOp(test_func));
+ early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
+
+ alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
+ point_size = Common::BitCast<u32>(regs.point_size);
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
binding_divisors[index] =
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 2c18eeaae..c26b77790 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -171,7 +171,7 @@ struct FixedPipelineState {
};
union {
- u32 raw;
+ u32 raw1;
BitField<0, 1, u32> no_extended_dynamic_state;
BitField<2, 1, u32> primitive_restart_enable;
BitField<3, 1, u32> depth_bias_enable;
@@ -187,6 +187,13 @@ struct FixedPipelineState {
BitField<23, 1, u32> rasterize_enable;
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
};
+ union {
+ u32 raw2;
+ BitField<0, 3, u32> alpha_test_func;
+ BitField<3, 1, u32> early_z;
+ };
+
+ u32 alpha_test_ref;
u32 point_size;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index f2610868e..a2173edd2 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -252,8 +252,6 @@ RendererVulkan::~RendererVulkan() {
}
void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- render_window.PollEvents();
-
if (!framebuffer) {
return;
}
@@ -283,7 +281,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
rasterizer->TickFrame();
}
- render_window.PollEvents();
+ render_window.OnFrameDisplayed();
}
bool RendererVulkan::Init() {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index dedc9c466..df7e8c864 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -7,6 +7,7 @@
#include <memory>
#include <vector>
+#include "common/bit_cast.h"
#include "common/microprofile.h"
#include "core/core.h"
#include "core/memory.h"
@@ -343,6 +344,12 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
specialization.attribute_types[i] = attribute.Type();
}
specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
+ specialization.early_fragment_tests = fixed_state.early_z;
+
+ // Alpha test
+ specialization.alpha_test_func =
+ FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
+ specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
SPIRVProgram program;
std::vector<VkDescriptorSetLayoutBinding> bindings;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a20452b87..fed9ebecd 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -315,7 +315,6 @@ public:
"supported on this device");
}
}
-
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
if (ir.UsesViewportIndex()) {
AddCapability(spv::Capability::MultiViewport);
@@ -325,11 +324,9 @@ public:
AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
}
}
-
if (device.IsFormatlessImageLoadSupported()) {
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
}
-
if (device.IsFloat16Supported()) {
AddCapability(spv::Capability::Float16);
}
@@ -377,6 +374,9 @@ public:
if (header.ps.omap.depth) {
AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
}
+ if (specialization.early_fragment_tests) {
+ AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
+ }
break;
case ShaderType::Compute:
const auto workgroup_size = specialization.workgroup_size;
@@ -2075,6 +2075,45 @@ private:
return {};
}
+ Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
+ using Compare = Maxwell::ComparisonOp;
+ switch (compare_op) {
+ case Compare::NeverOld:
+ return v_false; // Never let the test pass
+ case Compare::LessOld:
+ return OpFOrdLessThan(t_bool, operand_1, operand_2);
+ case Compare::EqualOld:
+ return OpFOrdEqual(t_bool, operand_1, operand_2);
+ case Compare::LessEqualOld:
+ return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
+ case Compare::GreaterOld:
+ return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
+ case Compare::NotEqualOld:
+ return OpFOrdNotEqual(t_bool, operand_1, operand_2);
+ case Compare::GreaterEqualOld:
+ return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ void AlphaTest(Id pointer) {
+ if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
+ return;
+ }
+ const Id true_label = OpLabel();
+ const Id discard_label = OpLabel();
+ const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
+ const Id alpha_value = OpLoad(t_float, pointer);
+ const Id condition =
+ MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
+
+ OpBranchConditional(condition, true_label, discard_label);
+ AddLabel(discard_label);
+ OpKill();
+ AddLabel(true_label);
+ }
+
void PreExit() {
if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
const u32 position_index = out_indices.position.value();
@@ -2097,8 +2136,6 @@ private:
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
"Sample mask write is unimplemented");
- // TODO(Rodrigo): Alpha testing
-
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
u32 current_reg = 0;
@@ -2110,6 +2147,9 @@ private:
}
const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
OpStore(pointer, SafeGetRegister(current_reg));
+ if (rt == 0 && component == 3) {
+ AlphaTest(pointer);
+ }
++current_reg;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 2b0e90396..110848922 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -95,6 +95,9 @@ struct Specialization final {
std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
bool ndc_minus_one_to_one{};
+ bool early_fragment_tests{};
+ float alpha_test_ref{};
+ Maxwell::ComparisonOp alpha_test_func{};
};
// Old gcc versions don't consider this trivially copyable.
// static_assert(std::is_trivially_copyable_v<Specialization>);
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 39cc3b869..6920afdf2 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -43,8 +43,8 @@ void AsyncShaders::AllocateWorkers() {
// Create workers
for (std::size_t i = 0; i < num_workers; i++) {
context_list.push_back(emu_window.CreateSharedContext());
- worker_threads.push_back(
- std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
+ worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
+ context_list[i].get());
}
}
@@ -106,8 +106,7 @@ std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
std::vector<Result> results;
{
std::unique_lock lock{completed_mutex};
- results.assign(std::make_move_iterator(finished_work.begin()),
- std::make_move_iterator(finished_work.end()));
+ results = std::move(finished_work);
finished_work.clear();
}
return results;
@@ -116,11 +115,10 @@ std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
Tegra::Engines::ShaderType shader_type, u64 uid,
std::vector<u64> code, std::vector<u64> code_b,
- u32 main_offset,
- VideoCommon::Shader::CompilerSettings compiler_settings,
- const VideoCommon::Shader::Registry& registry,
- VAddr cpu_addr) {
- WorkerParams params{
+ u32 main_offset, CompilerSettings compiler_settings,
+ const Registry& registry, VAddr cpu_addr) {
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push({
.backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
.device = &device,
.shader_type = shader_type,
@@ -131,9 +129,7 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
.compiler_settings = compiler_settings,
.registry = registry,
.cpu_address = cpu_addr,
- };
- std::unique_lock lock(queue_mutex);
- pending_queue.push(std::move(params));
+ });
cv.notify_one();
}
@@ -145,7 +141,8 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
std::vector<VkDescriptorSetLayoutBinding> bindings,
Vulkan::SPIRVProgram program,
Vulkan::GraphicsPipelineCacheKey key) {
- WorkerParams params{
+ std::unique_lock lock(queue_mutex);
+ pending_queue.push({
.backend = Backend::Vulkan,
.pp_cache = pp_cache,
.vk_device = &device,
@@ -153,13 +150,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
.descriptor_pool = &descriptor_pool,
.update_descriptor_queue = &update_descriptor_queue,
.renderpass_cache = &renderpass_cache,
- .bindings = bindings,
- .program = program,
+ .bindings = std::move(bindings),
+ .program = std::move(program),
.key = key,
- };
-
- std::unique_lock lock(queue_mutex);
- pending_queue.push(std::move(params));
+ });
cv.notify_one();
}
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 618d309d2..1ed4212ee 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -212,10 +212,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return 0;
case TextureFormat::R8G24:
if (component == 0) {
- return 8;
+ return 24;
}
if (component == 1) {
- return 24;
+ return 8;
}
return 0;
case TextureFormat::R8G8: