diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/command_classes/codecs/codec.h | 1 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 7 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 184 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 29 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | 12 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/fixed_pipeline_state.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/renderer_vulkan.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 7 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 50 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.h | 3 | ||||
-rw-r--r-- | src/video_core/shader/async_shaders.cpp | 32 | ||||
-rw-r--r-- | src/video_core/shader/decode/image.cpp | 4 |
14 files changed, 212 insertions, 138 deletions
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 5bbe6a332..ee5d62540 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -10,6 +10,7 @@ extern "C" { #if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #endif #include <libavcodec/avcodec.h> diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 1cbe8fe67..b0d9559d0 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -755,7 +755,11 @@ public: u32 data_upload; - INSERT_UNION_PADDING_WORDS(0x44); + INSERT_UNION_PADDING_WORDS(0x16); + + u32 force_early_fragment_tests; + + INSERT_UNION_PADDING_WORDS(0x2D); struct { union { @@ -1572,6 +1576,7 @@ ASSERT_REG_POSITION(shadow_ram_control, 0x49); ASSERT_REG_POSITION(upload, 0x60); ASSERT_REG_POSITION(exec_upload, 0x6C); ASSERT_REG_POSITION(data_upload, 0x6D); +ASSERT_REG_POSITION(force_early_fragment_tests, 0x84); ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(tess_mode, 0xC8); ASSERT_REG_POSITION(tess_level_outer, 0xC9); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index a3c05d1b0..37d17efdc 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -32,31 +32,31 @@ struct Register { constexpr Register() = default; - constexpr Register(u64 value) : value(value) {} + constexpr Register(u64 value_) : value(value_) {} - constexpr operator u64() const { + [[nodiscard]] constexpr operator u64() const { return value; } template <typename T> - constexpr u64 operator-(const T& oth) const { + [[nodiscard]] constexpr u64 operator-(const T& oth) const { return value - oth; } template <typename T> - constexpr u64 operator&(const T& oth) const { + [[nodiscard]] constexpr u64 operator&(const T& oth) const { return value & oth; } - constexpr u64 operator&(const Register& oth) const { + [[nodiscard]] constexpr u64 operator&(const Register& oth) const { return value & oth.value; } - constexpr u64 operator~() const { + [[nodiscard]] constexpr u64 operator~() const { return ~value; } - u64 GetSwizzledIndex(u64 elem) const { + [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const { elem = (value + elem) & 3; return (value & ~3) + elem; } @@ -75,7 +75,7 @@ enum class AttributeSize : u64 { union Attribute { Attribute() = default; - constexpr explicit Attribute(u64 value) : value(value) {} + constexpr explicit Attribute(u64 value_) : value(value_) {} enum class Index : u64 { LayerViewportPointSize = 6, @@ -107,7 +107,7 @@ union Attribute { BitField<31, 1, u64> patch; BitField<47, 3, AttributeSize> size; - bool IsPhysical() const { + [[nodiscard]] bool IsPhysical() const { return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0; } } fmt20; @@ -124,7 +124,7 @@ union Attribute { union Sampler { Sampler() = default; - constexpr explicit Sampler(u64 value) : value(value) {} + constexpr explicit Sampler(u64 value_) : value(value_) {} enum class Index : u64 { Sampler_0 = 8, @@ -137,7 +137,7 @@ union Sampler { union Image { Image() = default; - constexpr explicit Image(u64 value) : value{value} {} + constexpr explicit Image(u64 value_) : value{value_} {} BitField<36, 13, u64> index; u64 value; @@ -505,14 +505,14 @@ struct IpaMode { IpaInterpMode interpolation_mode; IpaSampleMode sampling_mode; - bool operator==(const IpaMode& a) const { + [[nodiscard]] bool operator==(const IpaMode& a) const { return std::tie(interpolation_mode, sampling_mode) == std::tie(a.interpolation_mode, a.sampling_mode); } - bool operator!=(const IpaMode& a) const { + [[nodiscard]] bool operator!=(const IpaMode& a) const { return !operator==(a); } - bool operator<(const IpaMode& a) const { + [[nodiscard]] bool operator<(const IpaMode& a) const { return std::tie(interpolation_mode, sampling_mode) < std::tie(a.interpolation_mode, a.sampling_mode); } @@ -658,10 +658,10 @@ union Instruction { return *this; } - constexpr Instruction(u64 value) : value{value} {} + constexpr Instruction(u64 value_) : value{value_} {} constexpr Instruction(const Instruction& instr) : value(instr.value) {} - constexpr bool Bit(u64 offset) const { + [[nodiscard]] constexpr bool Bit(u64 offset) const { return ((value >> offset) & 1) != 0; } @@ -746,34 +746,34 @@ union Instruction { BitField<28, 8, u64> imm_lut28; BitField<48, 8, u64> imm_lut48; - u32 GetImmLut28() const { + [[nodiscard]] u32 GetImmLut28() const { return static_cast<u32>(imm_lut28); } - u32 GetImmLut48() const { + [[nodiscard]] u32 GetImmLut48() const { return static_cast<u32>(imm_lut48); } } lop3; - u16 GetImm20_16() const { + [[nodiscard]] u16 GetImm20_16() const { return static_cast<u16>(imm20_16); } - u32 GetImm20_19() const { + [[nodiscard]] u32 GetImm20_19() const { u32 imm{static_cast<u32>(imm20_19)}; imm <<= 12; imm |= negate_imm ? 0x80000000 : 0; return imm; } - u32 GetImm20_32() const { + [[nodiscard]] u32 GetImm20_32() const { return static_cast<u32>(imm20_32); } - s32 GetSignedImm20_20() const { - u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19)); + [[nodiscard]] s32 GetSignedImm20_20() const { + const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19)); // Sign extend the 20-bit value. - u32 mask = 1U << (20 - 1); + const auto mask = 1U << (20 - 1); return static_cast<s32>((immediate ^ mask) - mask); } } alu; @@ -857,7 +857,7 @@ union Instruction { BitField<56, 1, u64> second_negate; BitField<30, 9, u64> second; - u32 PackImmediates() const { + [[nodiscard]] u32 PackImmediates() const { // Immediates are half floats shifted. constexpr u32 imm_shift = 6; return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift))); @@ -1033,7 +1033,7 @@ union Instruction { BitField<28, 2, AtomicType> type; BitField<30, 22, s64> offset; - s32 GetImmediateOffset() const { + [[nodiscard]] s32 GetImmediateOffset() const { return static_cast<s32>(offset << 2); } } atoms; @@ -1215,7 +1215,7 @@ union Instruction { BitField<39, 4, u64> rounding; // H0, H1 extract for F16 missing BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value - F2fRoundingOp GetRoundingMode() const { + [[nodiscard]] F2fRoundingOp GetRoundingMode() const { constexpr u64 rounding_mask = 0x0B; return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask); } @@ -1239,15 +1239,15 @@ union Instruction { BitField<54, 1, u64> aoffi_flag; BitField<55, 3, TextureProcessMode> process_mode; - bool IsComponentEnabled(std::size_t component) const { - return ((1ull << component) & component_mask) != 0; + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1271,15 +1271,15 @@ union Instruction { BitField<36, 1, u64> aoffi_flag; BitField<37, 3, TextureProcessMode> process_mode; - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { return ((1ULL << component) & component_mask) != 0; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1299,7 +1299,7 @@ union Instruction { BitField<31, 4, u64> component_mask; BitField<49, 1, u64> nodep_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NODEP: return nodep_flag != 0; @@ -1309,7 +1309,7 @@ union Instruction { return false; } - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { return ((1ULL << component) & component_mask) != 0; } } txq; @@ -1321,11 +1321,11 @@ union Instruction { BitField<35, 1, u64> ndv_flag; BitField<49, 1, u64> nodep_flag; - bool IsComponentEnabled(std::size_t component) const { - return ((1ull << component) & component_mask) != 0; + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return (ndv_flag != 0); @@ -1347,7 +1347,7 @@ union Instruction { BitField<54, 2, u64> offset_mode; BitField<56, 2, u64> component; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return ndv_flag != 0; @@ -1373,7 +1373,7 @@ union Instruction { BitField<33, 2, u64> offset_mode; BitField<37, 2, u64> component; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::NDV: return ndv_flag != 0; @@ -1399,7 +1399,7 @@ union Instruction { BitField<52, 2, u64> component; BitField<55, 1, u64> fp16_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return dc_flag != 0; @@ -1422,16 +1422,20 @@ union Instruction { BitField<53, 4, u64> texture_info; BitField<59, 1, u64> fp32_flag; - TextureType GetTextureType() const { + [[nodiscard]] TextureType GetTextureType() const { // The TEXS instruction has a weird encoding for the texture type. - if (texture_info == 0) + if (texture_info == 0) { return TextureType::Texture1D; - if (texture_info >= 1 && texture_info <= 9) + } + if (texture_info >= 1 && texture_info <= 9) { return TextureType::Texture2D; - if (texture_info >= 10 && texture_info <= 11) + } + if (texture_info >= 10 && texture_info <= 11) { return TextureType::Texture3D; - if (texture_info >= 12 && texture_info <= 13) + } + if (texture_info >= 12 && texture_info <= 13) { return TextureType::TextureCube; + } LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", static_cast<u32>(texture_info.Value())); @@ -1439,7 +1443,7 @@ union Instruction { return TextureType::Texture1D; } - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { switch (texture_info) { case 0: case 2: @@ -1458,7 +1462,7 @@ union Instruction { return TextureProcessMode::None; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::DC: return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; @@ -1470,16 +1474,16 @@ union Instruction { return false; } - bool IsArrayTexture() const { + [[nodiscard]] bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info >= 7 && texture_info <= 9; } - bool HasTwoDestinations() const { + [[nodiscard]] bool HasTwoDestinations() const { return gpr28.Value() != Register::ZeroIndex; } - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ {}, {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, @@ -1506,7 +1510,7 @@ union Instruction { BitField<54, 1, u64> cl; BitField<55, 1, u64> process_mode; - TextureProcessMode GetTextureProcessMode() const { + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; } } tld; @@ -1516,7 +1520,7 @@ union Instruction { BitField<53, 4, u64> texture_info; BitField<59, 1, u64> fp32_flag; - TextureType GetTextureType() const { + [[nodiscard]] TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. if (texture_info <= 1) { return TextureType::Texture1D; @@ -1535,13 +1539,14 @@ union Instruction { return TextureType::Texture1D; } - TextureProcessMode GetTextureProcessMode() const { - if (texture_info == 1 || texture_info == 5 || texture_info == 12) + [[nodiscard]] TextureProcessMode GetTextureProcessMode() const { + if (texture_info == 1 || texture_info == 5 || texture_info == 12) { return TextureProcessMode::LL; + } return TextureProcessMode::LZ; } - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::AOFFI: return texture_info == 12 || texture_info == 4; @@ -1555,7 +1560,7 @@ union Instruction { return false; } - bool IsArrayTexture() const { + [[nodiscard]] bool IsArrayTexture() const { // TEXS only supports Texture2D arrays. return texture_info == 8; } @@ -1567,7 +1572,7 @@ union Instruction { BitField<35, 1, u64> aoffi_flag; BitField<49, 1, u64> nodep_flag; - bool UsesMiscMode(TextureMiscMode mode) const { + [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const { switch (mode) { case TextureMiscMode::AOFFI: return aoffi_flag != 0; @@ -1591,7 +1596,7 @@ union Instruction { BitField<20, 3, StoreType> store_data_layout; BitField<20, 4, u64> component_mask_selector; - bool IsComponentEnabled(std::size_t component) const { + [[nodiscard]] bool IsComponentEnabled(std::size_t component) const { ASSERT(mode == SurfaceDataMode::P); constexpr u8 R = 0b0001; constexpr u8 G = 0b0010; @@ -1604,7 +1609,7 @@ union Instruction { return std::bitset<4>{mask.at(component_mask_selector)}.test(component); } - StoreType GetStoreDataLayout() const { + [[nodiscard]] StoreType GetStoreDataLayout() const { ASSERT(mode == SurfaceDataMode::D_BA); return store_data_layout; } @@ -1622,14 +1627,15 @@ union Instruction { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; - s32 GetBranchTarget() const { + [[nodiscard]] s32 GetBranchTarget() const { // Sign extend the branch target offset - u32 mask = 1U << (24 - 1); - u32 value = static_cast<u32>(target); + const auto mask = 1U << (24 - 1); + const auto target_value = static_cast<u32>(target); + constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); + // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + - 1; + return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; } } bra; @@ -1637,14 +1643,15 @@ union Instruction { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; - s32 GetBranchExtend() const { + [[nodiscard]] s32 GetBranchExtend() const { // Sign extend the branch target offset - u32 mask = 1U << (24 - 1); - u32 value = static_cast<u32>(target); + const auto mask = 1U << (24 - 1); + const auto target_value = static_cast<u32>(target); + constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction)); + // The branch offset is relative to the next instruction and is stored in bytes, so // divide it by the size of an instruction and add 1 to it. - return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + - 1; + return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1; } } brx; @@ -1697,7 +1704,7 @@ union Instruction { BitField<50, 1, u64> is_op_b_register; BitField<51, 3, VmnmxOperation> operation; - VmnmxType SourceFormatA() const { + [[nodiscard]] VmnmxType SourceFormatA() const { switch (src_format_a) { case 0b11: return VmnmxType::Bits32; @@ -1708,7 +1715,7 @@ union Instruction { } } - VmnmxType SourceFormatB() const { + [[nodiscard]] VmnmxType SourceFormatB() const { switch (src_format_b) { case 0b11: return VmnmxType::Bits32; @@ -1739,7 +1746,7 @@ union Instruction { BitField<20, 14, u64> shifted_offset; BitField<34, 5, u64> index; - u64 GetOffset() const { + [[nodiscard]] u64 GetOffset() const { return shifted_offset * 4; } } cbuf34; @@ -1748,7 +1755,7 @@ union Instruction { BitField<20, 16, s64> offset; BitField<36, 5, u64> index; - s64 GetOffset() const { + [[nodiscard]] s64 GetOffset() const { return offset; } } cbuf36; @@ -1997,29 +2004,29 @@ public: /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be /// conditionally executed). - static bool IsPredicatedInstruction(Id opcode) { + [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) { // TODO(Subv): Add the rest of unpredicated instructions. return opcode != Id::SSY && opcode != Id::PBK; } class Matcher { public: - constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) - : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} + constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_) + : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {} - constexpr const char* GetName() const { + [[nodiscard]] constexpr const char* GetName() const { return name; } - constexpr u16 GetMask() const { + [[nodiscard]] constexpr u16 GetMask() const { return mask; } - constexpr Id GetId() const { + [[nodiscard]] constexpr Id GetId() const { return id; } - constexpr Type GetType() const { + [[nodiscard]] constexpr Type GetType() const { return type; } @@ -2028,7 +2035,7 @@ public: * @param instruction The instruction to test * @returns true if the given instruction matches. */ - constexpr bool Matches(u16 instruction) const { + [[nodiscard]] constexpr bool Matches(u16 instruction) const { return (instruction & mask) == expected; } @@ -2040,7 +2047,8 @@ public: Type type; }; - static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) { + using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>; + [[nodiscard]] static DecodeResult Decode(Instruction instr) { static const auto table{GetDecodeTable()}; const auto matches_instruction = [instr](const auto& matcher) { @@ -2062,7 +2070,7 @@ private: * A '0' in a bitstring indicates that a zero must be present at that bit position. * A '1' in a bitstring indicates that a one must be present at that bit position. */ - static constexpr auto GetMaskAndExpect(const char* const bitstring) { + [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) { u16 mask = 0, expect = 0; for (std::size_t i = 0; i < opcode_bitsize; i++) { const std::size_t bit_position = opcode_bitsize - i - 1; @@ -2084,14 +2092,14 @@ private: public: /// Creates a matcher that can match and parse instructions based on bitstring. - static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, - const char* const name) { + [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op, + Type type, const char* const name) { const auto [mask, expected] = GetMaskAndExpect(bitstring); return Matcher(name, mask, expected, op, type); } }; - static std::vector<Matcher> GetDecodeTable() { + [[nodiscard]] static std::vector<Matcher> GetDecodeTable() { std::vector<Matcher> table = { #define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name) INST("111000110011----", Id::KIL, Type::Flow, "KIL"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ebd149c3a..e91f52938 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -95,22 +95,29 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) { if (!is_async) { return; } + if (syncpoint_id == UINT32_MAX) { + // TODO: Research what this does. + LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); + return; + } MICROPROFILE_SCOPE(GPU_wait); std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; }); + sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; }); } void GPU::IncrementSyncPoint(const u32 syncpoint_id) { - syncpoints[syncpoint_id]++; + auto& syncpoint = syncpoints.at(syncpoint_id); + syncpoint++; std::lock_guard lock{sync_mutex}; sync_cv.notify_all(); - if (!syncpt_interrupts[syncpoint_id].empty()) { - u32 value = syncpoints[syncpoint_id].load(); - auto it = syncpt_interrupts[syncpoint_id].begin(); - while (it != syncpt_interrupts[syncpoint_id].end()) { + auto& interrupt = syncpt_interrupts.at(syncpoint_id); + if (!interrupt.empty()) { + u32 value = syncpoint.load(); + auto it = interrupt.begin(); + while (it != interrupt.end()) { if (value >= *it) { TriggerCpuInterrupt(syncpoint_id, *it); - it = syncpt_interrupts[syncpoint_id].erase(it); + it = interrupt.erase(it); continue; } it++; @@ -119,22 +126,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) { } u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { - return syncpoints[syncpoint_id].load(); + return syncpoints.at(syncpoint_id).load(); } void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { - auto& interrupt = syncpt_interrupts[syncpoint_id]; + auto& interrupt = syncpt_interrupts.at(syncpoint_id); bool contains = std::any_of(interrupt.begin(), interrupt.end(), [value](u32 in_value) { return in_value == value; }); if (contains) { return; } - syncpt_interrupts[syncpoint_id].emplace_back(value); + interrupt.emplace_back(value); } bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { std::lock_guard lock{sync_mutex}; - auto& interrupt = syncpt_interrupts[syncpoint_id]; + auto& interrupt = syncpt_interrupts.at(syncpoint_id); const auto iter = std::find_if(interrupt.begin(), interrupt.end(), [value](u32 interrupt_value) { return value == interrupt_value; }); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 36bf92808..cfddbde5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1156,7 +1156,7 @@ void RasterizerOpenGL::SyncViewport() { flags[Dirty::ClipControl] = false; bool flip_y = false; - if (regs.viewport_transform[0].scale_y < 0.0) { + if (regs.viewport_transform[0].scale_y < 0.0f) { flip_y = !flip_y; } if (regs.screen_y_control.y_negate != 0) { @@ -1579,10 +1579,6 @@ void RasterizerOpenGL::SyncAlphaTest() { flags[Dirty::AlphaTest] = false; const auto& regs = maxwell3d.regs; - if (regs.alpha_test_enabled && regs.rt_control.count > 1) { - LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); - } - if (regs.alpha_test_enabled) { glEnable(GL_ALPHA_TEST); glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 2ccca1993..c869bb0e2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -151,8 +151,8 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer->TickFrame(); - render_window.PollEvents(); context->SwapBuffers(); + render_window.OnFrameDisplayed(); } void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index da5c550ea..5ec43db11 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -8,6 +8,7 @@ #include <boost/functional/hash.hpp> +#include "common/bit_cast.h" #include "common/cityhash.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" @@ -45,7 +46,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta regs.polygon_offset_fill_enable}; const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); - raw = 0; + raw1 = 0; primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); @@ -60,7 +61,14 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); topology.Assign(regs.draw.topology); - std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast + raw2 = 0; + const auto test_func = + regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; + alpha_test_func.Assign(PackComparisonOp(test_func)); + early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); + + alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); + point_size = Common::BitCast<u32>(regs.point_size); for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { binding_divisors[index] = diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 2c18eeaae..c26b77790 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -171,7 +171,7 @@ struct FixedPipelineState { }; union { - u32 raw; + u32 raw1; BitField<0, 1, u32> no_extended_dynamic_state; BitField<2, 1, u32> primitive_restart_enable; BitField<3, 1, u32> depth_bias_enable; @@ -187,6 +187,13 @@ struct FixedPipelineState { BitField<23, 1, u32> rasterize_enable; BitField<24, 4, Maxwell::PrimitiveTopology> topology; }; + union { + u32 raw2; + BitField<0, 3, u32> alpha_test_func; + BitField<3, 1, u32> early_z; + }; + + u32 alpha_test_ref; u32 point_size; std::array<u32, Maxwell::NumVertexArrays> binding_divisors; std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index f2610868e..a2173edd2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -252,8 +252,6 @@ RendererVulkan::~RendererVulkan() { } void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - render_window.PollEvents(); - if (!framebuffer) { return; } @@ -283,7 +281,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { rasterizer->TickFrame(); } - render_window.PollEvents(); + render_window.OnFrameDisplayed(); } bool RendererVulkan::Init() { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index dedc9c466..df7e8c864 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -7,6 +7,7 @@ #include <memory> #include <vector> +#include "common/bit_cast.h" #include "common/microprofile.h" #include "core/core.h" #include "core/memory.h" @@ -343,6 +344,12 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { specialization.attribute_types[i] = attribute.Type(); } specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; + specialization.early_fragment_tests = fixed_state.early_z; + + // Alpha test + specialization.alpha_test_func = + FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); + specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); SPIRVProgram program; std::vector<VkDescriptorSetLayoutBinding> bindings; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a20452b87..fed9ebecd 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -315,7 +315,6 @@ public: "supported on this device"); } } - if (ir.UsesLayer() || ir.UsesViewportIndex()) { if (ir.UsesViewportIndex()) { AddCapability(spv::Capability::MultiViewport); @@ -325,11 +324,9 @@ public: AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } - if (device.IsFormatlessImageLoadSupported()) { AddCapability(spv::Capability::StorageImageReadWithoutFormat); } - if (device.IsFloat16Supported()) { AddCapability(spv::Capability::Float16); } @@ -377,6 +374,9 @@ public: if (header.ps.omap.depth) { AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); } + if (specialization.early_fragment_tests) { + AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); + } break; case ShaderType::Compute: const auto workgroup_size = specialization.workgroup_size; @@ -2075,6 +2075,45 @@ private: return {}; } + Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { + using Compare = Maxwell::ComparisonOp; + switch (compare_op) { + case Compare::NeverOld: + return v_false; // Never let the test pass + case Compare::LessOld: + return OpFOrdLessThan(t_bool, operand_1, operand_2); + case Compare::EqualOld: + return OpFOrdEqual(t_bool, operand_1, operand_2); + case Compare::LessEqualOld: + return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); + case Compare::GreaterOld: + return OpFOrdGreaterThan(t_bool, operand_1, operand_2); + case Compare::NotEqualOld: + return OpFOrdNotEqual(t_bool, operand_1, operand_2); + case Compare::GreaterEqualOld: + return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); + default: + UNREACHABLE(); + } + } + + void AlphaTest(Id pointer) { + if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { + return; + } + const Id true_label = OpLabel(); + const Id discard_label = OpLabel(); + const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); + const Id alpha_value = OpLoad(t_float, pointer); + const Id condition = + MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); + + OpBranchConditional(condition, true_label, discard_label); + AddLabel(discard_label); + OpKill(); + AddLabel(true_label); + } + void PreExit() { if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { const u32 position_index = out_indices.position.value(); @@ -2097,8 +2136,6 @@ private: UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - // TODO(Rodrigo): Alpha testing - // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. u32 current_reg = 0; @@ -2110,6 +2147,9 @@ private: } const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); OpStore(pointer, SafeGetRegister(current_reg)); + if (rt == 0 && component == 3) { + AlphaTest(pointer); + } ++current_reg; } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 2b0e90396..110848922 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -95,6 +95,9 @@ struct Specialization final { std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; bool ndc_minus_one_to_one{}; + bool early_fragment_tests{}; + float alpha_test_ref{}; + Maxwell::ComparisonOp alpha_test_func{}; }; // Old gcc versions don't consider this trivially copyable. // static_assert(std::is_trivially_copyable_v<Specialization>); diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 39cc3b869..6920afdf2 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -43,8 +43,8 @@ void AsyncShaders::AllocateWorkers() { // Create workers for (std::size_t i = 0; i < num_workers; i++) { context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.push_back( - std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())); + worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, + context_list[i].get()); } } @@ -106,8 +106,7 @@ std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { std::vector<Result> results; { std::unique_lock lock{completed_mutex}; - results.assign(std::make_move_iterator(finished_work.begin()), - std::make_move_iterator(finished_work.end())); + results = std::move(finished_work); finished_work.clear(); } return results; @@ -116,11 +115,10 @@ std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, u64 uid, std::vector<u64> code, std::vector<u64> code_b, - u32 main_offset, - VideoCommon::Shader::CompilerSettings compiler_settings, - const VideoCommon::Shader::Registry& registry, - VAddr cpu_addr) { - WorkerParams params{ + u32 main_offset, CompilerSettings compiler_settings, + const Registry& registry, VAddr cpu_addr) { + std::unique_lock lock(queue_mutex); + pending_queue.push({ .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, .device = &device, .shader_type = shader_type, @@ -131,9 +129,7 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, .compiler_settings = compiler_settings, .registry = registry, .cpu_address = cpu_addr, - }; - std::unique_lock lock(queue_mutex); - pending_queue.push(std::move(params)); + }); cv.notify_one(); } @@ -145,7 +141,8 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, std::vector<VkDescriptorSetLayoutBinding> bindings, Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key) { - WorkerParams params{ + std::unique_lock lock(queue_mutex); + pending_queue.push({ .backend = Backend::Vulkan, .pp_cache = pp_cache, .vk_device = &device, @@ -153,13 +150,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, .descriptor_pool = &descriptor_pool, .update_descriptor_queue = &update_descriptor_queue, .renderpass_cache = &renderpass_cache, - .bindings = bindings, - .program = program, + .bindings = std::move(bindings), + .program = std::move(program), .key = key, - }; - - std::unique_lock lock(queue_mutex); - pending_queue.push(std::move(params)); + }); cv.notify_one(); } diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 618d309d2..1ed4212ee 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -212,10 +212,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return 0; case TextureFormat::R8G24: if (component == 0) { - return 8; + return 24; } if (component == 1) { - return 24; + return 8; } return 0; case TextureFormat::R8G8: |