diff options
28 files changed, 1885 insertions, 645 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2ba1da195..d6eb9055b 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -32,6 +32,8 @@ add_library(common STATIC break_points.cpp break_points.h chunk_file.h + cityhash.cpp + cityhash.h code_block.h color.h common_funcs.h @@ -39,7 +41,6 @@ add_library(common STATIC common_types.h file_util.cpp file_util.h - hash.cpp hash.h linear_disk_cache.h logging/backend.cpp diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 0cc0a1be0..5638bdbba 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -115,7 +115,7 @@ private: // assignment would copy the full storage value, rather than just the bits // relevant to this particular bit field. // We don't delete it because we want BitField to be trivially copyable. - BitField& operator=(const BitField&) = default; + constexpr BitField& operator=(const BitField&) = default; // StorageType is T for non-enum types and the underlying type of T if // T is an enumeration. Note that T is wrapped within an enable_if in the @@ -166,20 +166,20 @@ public: // so that we can use this within unions constexpr BitField() = default; - FORCE_INLINE operator T() const { + constexpr FORCE_INLINE operator T() const { return Value(); } - FORCE_INLINE void Assign(const T& value) { + constexpr FORCE_INLINE void Assign(const T& value) { storage = (storage & ~mask) | FormatValue(value); } - FORCE_INLINE T Value() const { + constexpr T Value() const { return ExtractValue(storage); } // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 - FORCE_INLINE bool ToBool() const { + constexpr FORCE_INLINE bool ToBool() const { return Value() != 0; } diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp new file mode 100644 index 000000000..de31ffbd8 --- /dev/null +++ b/src/common/cityhash.cpp @@ -0,0 +1,340 @@ +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides CityHash64() and related functions. +// +// It's probably possible to create even faster hash functions by +// writing a program that systematically explores some of the space of +// possible hash functions, by using SIMD instructions, or by +// compromising on hash quality. + +#include <algorithm> +#include <string.h> // for memcpy and memset +#include "cityhash.h" +#include "common/swap.h" + +// #include "config.h" +#ifdef __GNUC__ +#define HAVE_BUILTIN_EXPECT 1 +#endif +#ifdef COMMON_BIG_ENDIAN +#define WORDS_BIGENDIAN 1 +#endif + +using namespace std; + +typedef uint8_t uint8; +typedef uint32_t uint32; +typedef uint64_t uint64; + +namespace Common { + +static uint64 UNALIGNED_LOAD64(const char* p) { + uint64 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +static uint32 UNALIGNED_LOAD32(const char* p) { + uint32 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +#ifdef WORDS_BIGENDIAN +#define uint32_in_expected_order(x) (swap32(x)) +#define uint64_in_expected_order(x) (swap64(x)) +#else +#define uint32_in_expected_order(x) (x) +#define uint64_in_expected_order(x) (x) +#endif + +#if !defined(LIKELY) +#if HAVE_BUILTIN_EXPECT +#define LIKELY(x) (__builtin_expect(!!(x), 1)) +#else +#define LIKELY(x) (x) +#endif +#endif + +static uint64 Fetch64(const char* p) { + return uint64_in_expected_order(UNALIGNED_LOAD64(p)); +} + +static uint32 Fetch32(const char* p) { + return uint32_in_expected_order(UNALIGNED_LOAD32(p)); +} + +// Some primes between 2^63 and 2^64 for various uses. +static const uint64 k0 = 0xc3a5c85c97cb3127ULL; +static const uint64 k1 = 0xb492b66fbe98f273ULL; +static const uint64 k2 = 0x9ae16a3b2f90404fULL; + +// Bitwise right rotate. Normally this will compile to a single +// instruction, especially if the shift is a manifest constant. +static uint64 Rotate(uint64 val, int shift) { + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); +} + +static uint64 ShiftMix(uint64 val) { + return val ^ (val >> 47); +} + +static uint64 HashLen16(uint64 u, uint64 v) { + return Hash128to64(uint128(u, v)); +} + +static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { + // Murmur-inspired hashing. + uint64 a = (u ^ v) * mul; + a ^= (a >> 47); + uint64 b = (v ^ a) * mul; + b ^= (b >> 47); + b *= mul; + return b; +} + +static uint64 HashLen0to16(const char* s, size_t len) { + if (len >= 8) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) + k2; + uint64 b = Fetch64(s + len - 8); + uint64 c = Rotate(b, 37) * mul + a; + uint64 d = (Rotate(a, 25) + b) * mul; + return HashLen16(c, d, mul); + } + if (len >= 4) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch32(s); + return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); + } + if (len > 0) { + uint8 a = s[0]; + uint8 b = s[len >> 1]; + uint8 c = s[len - 1]; + uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8); + uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2); + return ShiftMix(y * k2 ^ z * k0) * k2; + } + return k2; +} + +// This probably works well for 16-byte strings as well, but it may be overkill +// in that case. +static uint64 HashLen17to32(const char* s, size_t len) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) * k1; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 8) * mul; + uint64 d = Fetch64(s + len - 16) * k2; + return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul); +} + +// Return a 16-byte hash for 48 bytes. Quick and dirty. +// Callers do best to use "random-looking" values for a and b. +static pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, + uint64 b) { + a += w; + b = Rotate(b + a + z, 21); + uint64 c = a; + a += x; + a += y; + b += Rotate(a, 44); + return make_pair(a + z, b + c); +} + +// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +static pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { + return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, + b); +} + +// Return an 8-byte hash for 33 to 64 bytes. +static uint64 HashLen33to64(const char* s, size_t len) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) * k2; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 24); + uint64 d = Fetch64(s + len - 32); + uint64 e = Fetch64(s + 16) * k2; + uint64 f = Fetch64(s + 24) * 9; + uint64 g = Fetch64(s + len - 8); + uint64 h = Fetch64(s + len - 16) * mul; + uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; + uint64 v = ((a + g) ^ d) + f + 1; + uint64 w = swap64((u + v) * mul) + h; + uint64 x = Rotate(e + f, 42) + c; + uint64 y = (swap64((v + w) * mul) + g) * mul; + uint64 z = e + f + c; + a = swap64((x + z) * mul + y) + b; + b = ShiftMix((z + a) * mul + d + h) * mul; + return b + x; +} + +uint64 CityHash64(const char* s, size_t len) { + if (len <= 32) { + if (len <= 16) { + return HashLen0to16(s, len); + } else { + return HashLen17to32(s, len); + } + } else if (len <= 64) { + return HashLen33to64(s, len); + } + + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + uint64 x = Fetch64(s + len - 40); + uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); + uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); + pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z); + pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); + x = x * k1 + Fetch64(s); + + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~static_cast<size_t>(63); + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 64; + } while (len != 0); + return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, + HashLen16(v.second, w.second) + x); +} + +uint64 CityHash64WithSeed(const char* s, size_t len, uint64 seed) { + return CityHash64WithSeeds(s, len, k2, seed); +} + +uint64 CityHash64WithSeeds(const char* s, size_t len, uint64 seed0, uint64 seed1) { + return HashLen16(CityHash64(s, len) - seed0, seed1); +} + +// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings +// of any length representable in signed long. Based on City and Murmur. +static uint128 CityMurmur(const char* s, size_t len, uint128 seed) { + uint64 a = Uint128Low64(seed); + uint64 b = Uint128High64(seed); + uint64 c = 0; + uint64 d = 0; + signed long l = static_cast<long>(len) - 16; + if (l <= 0) { // len <= 16 + a = ShiftMix(a * k1) * k1; + c = b * k1 + HashLen0to16(s, len); + d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); + } else { // len > 16 + c = HashLen16(Fetch64(s + len - 8) + k1, a); + d = HashLen16(b + len, c + Fetch64(s + len - 16)); + a += d; + do { + a ^= ShiftMix(Fetch64(s) * k1) * k1; + a *= k1; + b ^= a; + c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; + c *= k1; + d ^= c; + s += 16; + l -= 16; + } while (l > 0); + } + a = HashLen16(a, c); + b = HashLen16(d, b); + return uint128(a ^ b, HashLen16(b, a)); +} + +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed) { + if (len < 128) { + return CityMurmur(s, len, seed); + } + + // We expect len >= 128 to be the common case. Keep 56 bytes of state: + // v, w, x, y, and z. + pair<uint64, uint64> v, w; + uint64 x = Uint128Low64(seed); + uint64 y = Uint128High64(seed); + uint64 z = len * k1; + v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); + v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); + w.first = Rotate(y + z, 35) * k1 + x; + w.second = Rotate(x + Fetch64(s + 88), 53) * k1; + + // This is the same inner loop as CityHash64(), manually unrolled. + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 128; + } while (LIKELY(len >= 128)); + x += Rotate(v.first + z, 49) * k0; + y = y * k0 + Rotate(w.second, 37); + z = z * k0 + Rotate(w.first, 27); + w.first *= 9; + v.first *= k0; + // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + for (size_t tail_done = 0; tail_done < len;) { + tail_done += 32; + y = Rotate(x + y, 42) * k0 + v.second; + w.first += Fetch64(s + len - tail_done + 16); + x = x * k0 + w.first; + z += w.second + Fetch64(s + len - tail_done); + w.second += v.first; + v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); + v.first *= k0; + } + // At this point our 56 bytes of state should contain more than + // enough information for a strong 128-bit hash. We use two + // different 56-byte-to-8-byte hashes to get a 16-byte final result. + x = HashLen16(x, v.first); + y = HashLen16(y + z, w.first); + return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); +} + +uint128 CityHash128(const char* s, size_t len) { + return len >= 16 + ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0)) + : CityHash128WithSeed(s, len, uint128(k0, k1)); +} + +} // namespace Common diff --git a/src/common/cityhash.h b/src/common/cityhash.h new file mode 100644 index 000000000..bcebdb150 --- /dev/null +++ b/src/common/cityhash.h @@ -0,0 +1,110 @@ +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// http://code.google.com/p/cityhash/ +// +// This file provides a few functions for hashing strings. All of them are +// high-quality functions in the sense that they pass standard tests such +// as Austin Appleby's SMHasher. They are also fast. +// +// For 64-bit x86 code, on short strings, we don't know of anything faster than +// CityHash64 that is of comparable quality. We believe our nearest competitor +// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash +// tables and most other hashing (excluding cryptography). +// +// For 64-bit x86 code, on long strings, the picture is more complicated. +// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., +// CityHashCrc128 appears to be faster than all competitors of comparable +// quality. CityHash128 is also good but not quite as fast. We believe our +// nearest competitor is Bob Jenkins' Spooky. We don't have great data for +// other 64-bit CPUs, but for long strings we know that Spooky is slightly +// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. +// Note that CityHashCrc128 is declared in citycrc.h. +// +// For 32-bit x86 code, we don't know of anything faster than CityHash32 that +// is of comparable quality. We believe our nearest competitor is Murmur3A. +// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) +// +// Functions in the CityHash family are not suitable for cryptography. +// +// Please see CityHash's README file for more details on our performance +// measurements and so on. +// +// WARNING: This code has been only lightly tested on big-endian platforms! +// It is known to work well on little-endian platforms that have a small penalty +// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. +// It should work on all 32-bit and 64-bit platforms that allow unaligned reads; +// bug reports are welcome. +// +// By the way, for some hash functions, given strings a and b, the hash +// of a+b is easily derived from the hashes of a and b. This property +// doesn't hold for any hash functions in this file. + +#pragma once + +#include <utility> +#include <stdint.h> +#include <stdlib.h> // for size_t. + +namespace Common { + +typedef std::pair<uint64_t, uint64_t> uint128; + +inline uint64_t Uint128Low64(const uint128& x) { + return x.first; +} +inline uint64_t Uint128High64(const uint128& x) { + return x.second; +} + +// Hash function for a byte array. +uint64_t CityHash64(const char* buf, size_t len); + +// Hash function for a byte array. For convenience, a 64-bit seed is also +// hashed into the result. +uint64_t CityHash64WithSeed(const char* buf, size_t len, uint64_t seed); + +// Hash function for a byte array. For convenience, two seeds are also +// hashed into the result. +uint64_t CityHash64WithSeeds(const char* buf, size_t len, uint64_t seed0, uint64_t seed1); + +// Hash function for a byte array. +uint128 CityHash128(const char* s, size_t len); + +// Hash function for a byte array. For convenience, a 128-bit seed is also +// hashed into the result. +uint128 CityHash128WithSeed(const char* s, size_t len, uint128 seed); + +// Hash 128 input bits down to 64 bits of output. +// This is intended to be a reasonably good hash function. +inline uint64_t Hash128to64(const uint128& x) { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; + a ^= (a >> 47); + uint64_t b = (Uint128High64(x) ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +} // namespace Common diff --git a/src/common/hash.cpp b/src/common/hash.cpp deleted file mode 100644 index a02e9e5b9..000000000 --- a/src/common/hash.cpp +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#if defined(_MSC_VER) -#include <stdlib.h> -#endif -#include "common/common_funcs.h" -#include "common/common_types.h" -#include "common/hash.h" - -namespace Common { - -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. - -// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do -// the conversion here -static FORCE_INLINE u64 getblock64(const u64* p, size_t i) { - return p[i]; -} - -// Finalization mix - force all bits of a hash block to avalanche -static FORCE_INLINE u64 fmix64(u64 k) { - k ^= k >> 33; - k *= 0xff51afd7ed558ccdllu; - k ^= k >> 33; - k *= 0xc4ceb9fe1a85ec53llu; - k ^= k >> 33; - - return k; -} - -// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit -// platforms (MurmurHash3_x64_128). It was taken from: -// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp -void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) { - const u8* data = (const u8*)key; - const size_t nblocks = len / 16; - - u64 h1 = seed; - u64 h2 = seed; - - const u64 c1 = 0x87c37b91114253d5llu; - const u64 c2 = 0x4cf5ad432745937fllu; - - // Body - - const u64* blocks = (const u64*)(data); - - for (size_t i = 0; i < nblocks; i++) { - u64 k1 = getblock64(blocks, i * 2 + 0); - u64 k2 = getblock64(blocks, i * 2 + 1); - - k1 *= c1; - k1 = _rotl64(k1, 31); - k1 *= c2; - h1 ^= k1; - - h1 = _rotl64(h1, 27); - h1 += h2; - h1 = h1 * 5 + 0x52dce729; - - k2 *= c2; - k2 = _rotl64(k2, 33); - k2 *= c1; - h2 ^= k2; - - h2 = _rotl64(h2, 31); - h2 += h1; - h2 = h2 * 5 + 0x38495ab5; - } - - // Tail - - const u8* tail = (const u8*)(data + nblocks * 16); - - u64 k1 = 0; - u64 k2 = 0; - - switch (len & 15) { - case 15: - k2 ^= ((u64)tail[14]) << 48; - case 14: - k2 ^= ((u64)tail[13]) << 40; - case 13: - k2 ^= ((u64)tail[12]) << 32; - case 12: - k2 ^= ((u64)tail[11]) << 24; - case 11: - k2 ^= ((u64)tail[10]) << 16; - case 10: - k2 ^= ((u64)tail[9]) << 8; - case 9: - k2 ^= ((u64)tail[8]) << 0; - k2 *= c2; - k2 = _rotl64(k2, 33); - k2 *= c1; - h2 ^= k2; - - case 8: - k1 ^= ((u64)tail[7]) << 56; - case 7: - k1 ^= ((u64)tail[6]) << 48; - case 6: - k1 ^= ((u64)tail[5]) << 40; - case 5: - k1 ^= ((u64)tail[4]) << 32; - case 4: - k1 ^= ((u64)tail[3]) << 24; - case 3: - k1 ^= ((u64)tail[2]) << 16; - case 2: - k1 ^= ((u64)tail[1]) << 8; - case 1: - k1 ^= ((u64)tail[0]) << 0; - k1 *= c1; - k1 = _rotl64(k1, 31); - k1 *= c2; - h1 ^= k1; - }; - - // Finalization - - h1 ^= len; - h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = fmix64(h1); - h2 = fmix64(h2); - - h1 += h2; - h2 += h1; - - ((u64*)out)[0] = h1; - ((u64*)out)[1] = h2; -} - -} // namespace Common diff --git a/src/common/hash.h b/src/common/hash.h index ee2560dad..73c326980 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -5,12 +5,12 @@ #pragma once #include <cstddef> +#include <cstring> +#include "common/cityhash.h" #include "common/common_types.h" namespace Common { -void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); - /** * Computes a 64-bit hash over the specified block of data * @param data Block of data to compute hash over @@ -18,9 +18,54 @@ void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out); * @returns 64-bit hash value that was computed over the data block */ static inline u64 ComputeHash64(const void* data, size_t len) { - u64 res[2]; - MurmurHash3_128(data, len, 0, res); - return res[0]; + return CityHash64(static_cast<const char*>(data), len); +} + +/** + * Computes a 64-bit hash of a struct. In addition to being trivially copyable, it is also critical + * that either the struct includes no padding, or that any padding is initialized to a known value + * by memsetting the struct to 0 before filling it in. + */ +template <typename T> +static inline u64 ComputeStructHash64(const T& data) { + static_assert(std::is_trivially_copyable<T>(), + "Type passed to ComputeStructHash64 must be trivially copyable"); + return ComputeHash64(&data, sizeof(data)); } +/// A helper template that ensures the padding in a struct is initialized by memsetting to 0. +template <typename T> +struct HashableStruct { + // In addition to being trivially copyable, T must also have a trivial default constructor, + // because any member initialization would be overridden by memset + static_assert(std::is_trivial<T>(), "Type passed to HashableStruct must be trivial"); + /* + * We use a union because "implicitly-defined copy/move constructor for a union X copies the + * object representation of X." and "implicitly-defined copy assignment operator for a union X + * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy. + * This is important because the padding bytes are included in the hash and comparison between + * objects. + */ + union { + T state; + }; + + HashableStruct() { + // Memset structure to zero padding bits, so that they will be deterministic when hashing + std::memset(&state, 0, sizeof(T)); + } + + bool operator==(const HashableStruct<T>& o) const { + return std::memcmp(&state, &o.state, sizeof(T)) == 0; + }; + + bool operator!=(const HashableStruct<T>& o) const { + return !(*this == o); + }; + + size_t Hash() const { + return Common::ComputeStructHash64(state); + } +}; + } // namespace Common diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 48c45b1b4..458210a55 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp @@ -73,7 +73,7 @@ public: : ServiceFramework("IFile"), backend(std::move(backend)) { static const FunctionInfo functions[] = { {0, &IFile::Read, "Read"}, {1, &IFile::Write, "Write"}, - {2, nullptr, "Flush"}, {3, &IFile::SetSize, "SetSize"}, + {2, &IFile::Flush, "Flush"}, {3, &IFile::SetSize, "SetSize"}, {4, &IFile::GetSize, "GetSize"}, {5, nullptr, "OperateRange"}, }; RegisterHandlers(functions); @@ -152,6 +152,14 @@ private: rb.Push(RESULT_SUCCESS); } + void Flush(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_FS, "called"); + backend->Flush(); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + void SetSize(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const u64 size = rp.Pop<u64>(); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a710c4bc5..281810357 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -9,6 +9,7 @@ add_library(video_core STATIC engines/maxwell_3d.h engines/maxwell_compute.cpp engines/maxwell_compute.h + engines/shader_bytecode.h gpu.cpp gpu.h macro_interpreter.cpp @@ -27,6 +28,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_decompiler.h renderer_opengl/gl_shader_gen.cpp renderer_opengl/gl_shader_gen.h + renderer_opengl/gl_shader_manager.cpp + renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state.cpp diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 98b39b2ff..9c6236c39 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -427,14 +427,11 @@ public: BitField<0, 1, u32> enable; BitField<4, 4, ShaderProgram> program; }; - u32 start_id; - INSERT_PADDING_WORDS(1); - u32 gpr_alloc; - ShaderStage type; - INSERT_PADDING_WORDS(9); + u32 offset; + INSERT_PADDING_WORDS(14); } shader_config[MaxShaderProgram]; - INSERT_PADDING_WORDS(0x8C); + INSERT_PADDING_WORDS(0x80); struct { u32 cb_size; @@ -507,6 +504,7 @@ public: }; State state{}; + MemoryManager& memory_manager; /// Reads a register value located at the input method address u32 GetRegisterValue(u32 method) const; @@ -521,8 +519,6 @@ public: std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; private: - MemoryManager& memory_manager; - std::unordered_map<u32, std::vector<u32>> uploaded_macros; /// Macro method that is currently being executed / being fed parameters. diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h new file mode 100644 index 000000000..eff0c35a1 --- /dev/null +++ b/src/video_core/engines/shader_bytecode.h @@ -0,0 +1,327 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <map> +#include <string> +#include "common/bit_field.h" + +namespace Tegra { +namespace Shader { + +struct Register { + Register() = default; + + constexpr Register(u64 value) : value(value) {} + + constexpr u64 GetIndex() const { + return value; + } + + constexpr operator u64() const { + return value; + } + + template <typename T> + constexpr u64 operator-(const T& oth) const { + return value - oth; + } + + template <typename T> + constexpr u64 operator&(const T& oth) const { + return value & oth; + } + + constexpr u64 operator&(const Register& oth) const { + return value & oth.value; + } + + constexpr u64 operator~() const { + return ~value; + } + +private: + u64 value; +}; + +union Attribute { + Attribute() = default; + + constexpr Attribute(u64 value) : value(value) {} + + enum class Index : u64 { + Position = 7, + Attribute_0 = 8, + }; + + union { + BitField<22, 2, u64> element; + BitField<24, 6, Index> index; + BitField<47, 3, u64> size; + } fmt20; + + union { + BitField<30, 2, u64> element; + BitField<32, 6, Index> index; + } fmt28; + + BitField<39, 8, u64> reg; + u64 value; +}; + +union Uniform { + BitField<20, 14, u64> offset; + BitField<34, 5, u64> index; +}; + +union OpCode { + enum class Id : u64 { + TEXS = 0x6C, + IPA = 0xE0, + FFMA_IMM = 0x65, + FFMA_CR = 0x93, + FFMA_RC = 0xA3, + FFMA_RR = 0xB3, + + FADD_C = 0x98B, + FMUL_C = 0x98D, + MUFU = 0xA10, + FADD_R = 0xB8B, + FMUL_R = 0xB8D, + LD_A = 0x1DFB, + ST_A = 0x1DFE, + + FSETP_R = 0x5BB, + FSETP_C = 0x4BB, + EXIT = 0xE30, + KIL = 0xE33, + + FMUL_IMM = 0x70D, + FMUL_IMM_x = 0x72D, + FADD_IMM = 0x70B, + FADD_IMM_x = 0x72B, + }; + + enum class Type { + Trivial, + Arithmetic, + Ffma, + Flow, + Memory, + Unknown, + }; + + struct Info { + Type type; + std::string name; + }; + + OpCode() = default; + + constexpr OpCode(Id value) : value(static_cast<u64>(value)) {} + + constexpr OpCode(u64 value) : value{value} {} + + constexpr Id EffectiveOpCode() const { + switch (op1) { + case Id::TEXS: + return op1; + } + + switch (op2) { + case Id::IPA: + return op2; + } + + switch (op3) { + case Id::FFMA_IMM: + case Id::FFMA_CR: + case Id::FFMA_RC: + case Id::FFMA_RR: + return op3; + } + + switch (op4) { + case Id::EXIT: + case Id::FSETP_R: + case Id::FSETP_C: + case Id::KIL: + return op4; + } + + switch (op5) { + case Id::MUFU: + case Id::LD_A: + case Id::ST_A: + case Id::FADD_R: + case Id::FADD_C: + case Id::FMUL_R: + case Id::FMUL_C: + return op5; + + case Id::FMUL_IMM: + case Id::FMUL_IMM_x: + return Id::FMUL_IMM; + + case Id::FADD_IMM: + case Id::FADD_IMM_x: + return Id::FADD_IMM; + } + + return static_cast<Id>(value); + } + + static const Info& GetInfo(const OpCode& opcode) { + static const std::map<Id, Info> info_table{BuildInfoTable()}; + const auto& search{info_table.find(opcode.EffectiveOpCode())}; + if (search != info_table.end()) { + return search->second; + } + + static const Info unknown{Type::Unknown, "UNK"}; + return unknown; + } + + constexpr operator Id() const { + return static_cast<Id>(value); + } + + constexpr OpCode operator<<(size_t bits) const { + return value << bits; + } + + constexpr OpCode operator>>(size_t bits) const { + return value >> bits; + } + + template <typename T> + constexpr u64 operator-(const T& oth) const { + return value - oth; + } + + constexpr u64 operator&(const OpCode& oth) const { + return value & oth.value; + } + + constexpr u64 operator~() const { + return ~value; + } + + static std::map<Id, Info> BuildInfoTable() { + std::map<Id, Info> info_table; + info_table[Id::TEXS] = {Type::Memory, "texs"}; + info_table[Id::LD_A] = {Type::Memory, "ld_a"}; + info_table[Id::ST_A] = {Type::Memory, "st_a"}; + info_table[Id::MUFU] = {Type::Arithmetic, "mufu"}; + info_table[Id::FFMA_IMM] = {Type::Ffma, "ffma_imm"}; + info_table[Id::FFMA_CR] = {Type::Ffma, "ffma_cr"}; + info_table[Id::FFMA_RC] = {Type::Ffma, "ffma_rc"}; + info_table[Id::FFMA_RR] = {Type::Ffma, "ffma_rr"}; + info_table[Id::FADD_R] = {Type::Arithmetic, "fadd_r"}; + info_table[Id::FADD_C] = {Type::Arithmetic, "fadd_c"}; + info_table[Id::FADD_IMM] = {Type::Arithmetic, "fadd_imm"}; + info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"}; + info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"}; + info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"}; + info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"}; + info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"}; + info_table[Id::EXIT] = {Type::Trivial, "exit"}; + info_table[Id::IPA] = {Type::Trivial, "ipa"}; + info_table[Id::KIL] = {Type::Flow, "kil"}; + return info_table; + } + + BitField<57, 7, Id> op1; + BitField<56, 8, Id> op2; + BitField<55, 9, Id> op3; + BitField<52, 12, Id> op4; + BitField<51, 13, Id> op5; + u64 value; +}; +static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size"); + +} // namespace Shader +} // namespace Tegra + +namespace std { + +// TODO(bunne): The below is forbidden by the C++ standard, but works fine. See #330. +template <> +struct make_unsigned<Tegra::Shader::Attribute> { + using type = Tegra::Shader::Attribute; +}; + +template <> +struct make_unsigned<Tegra::Shader::Register> { + using type = Tegra::Shader::Register; +}; + +template <> +struct make_unsigned<Tegra::Shader::OpCode> { + using type = Tegra::Shader::OpCode; +}; + +} // namespace std + +namespace Tegra { +namespace Shader { + +enum class Pred : u64 { + UnusedIndex = 0x7, + NeverExecute = 0xf, +}; + +enum class SubOp : u64 { + Cos = 0x0, + Sin = 0x1, + Ex2 = 0x2, + Lg2 = 0x3, + Rcp = 0x4, + Rsq = 0x5, +}; + +union Instruction { + Instruction& operator=(const Instruction& instr) { + hex = instr.hex; + return *this; + } + + OpCode opcode; + BitField<0, 8, Register> gpr0; + BitField<8, 8, Register> gpr8; + BitField<16, 4, Pred> pred; + BitField<20, 8, Register> gpr20; + BitField<20, 7, SubOp> sub_op; + BitField<28, 8, Register> gpr28; + BitField<36, 13, u64> imm36; + BitField<39, 8, Register> gpr39; + + union { + BitField<45, 1, u64> negate_b; + BitField<46, 1, u64> abs_a; + BitField<48, 1, u64> negate_a; + BitField<49, 1, u64> abs_b; + BitField<50, 1, u64> abs_d; + } alu; + + union { + BitField<48, 1, u64> negate_b; + BitField<49, 1, u64> negate_c; + } ffma; + + BitField<60, 1, u64> is_b_gpr; + BitField<59, 1, u64> is_c_gpr; + + Attribute attribute; + Uniform uniform; + + u64 hex; +}; +static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); +static_assert(std::is_standard_layout<Instruction>::value, + "Structure does not have standard layout"); + +} // namespace Shader +} // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f217a265b..f75d4c658 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -34,33 +34,7 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -enum class UniformBindings : GLuint { Common, VS, FS }; - -static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, - size_t expected_size) { - GLuint ub_index = glGetUniformBlockIndex(shader, name); - if (ub_index != GL_INVALID_INDEX) { - GLint ub_size = 0; - glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); - ASSERT_MSG(ub_size == expected_size, - "Uniform block size did not match! Got %d, expected %zu", - static_cast<int>(ub_size), expected_size); - glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); - } -} - -static void SetShaderUniformBlockBindings(GLuint shader) { - SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, - sizeof(RasterizerOpenGL::UniformData)); - SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, - sizeof(RasterizerOpenGL::VSUniformData)); - SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, - sizeof(RasterizerOpenGL::FSUniformData)); -} - RasterizerOpenGL::RasterizerOpenGL() { - shader_dirty = true; - has_ARB_buffer_storage = false; has_ARB_direct_state_access = false; has_ARB_separate_shader_objects = false; @@ -88,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() { } } + ASSERT_MSG(has_ARB_separate_shader_objects, "has_ARB_separate_shader_objects is unsupported"); + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; @@ -102,36 +78,31 @@ RasterizerOpenGL::RasterizerOpenGL() { state.draw.uniform_buffer = uniform_buffer.handle; state.Apply(); - glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); - - uniform_block_data.dirty = true; - // Create render framebuffer framebuffer.Create(); - if (has_ARB_separate_shader_objects) { - hw_vao.Create(); - hw_vao_enabled_attributes.fill(false); + hw_vao.Create(); + hw_vao_enabled_attributes.fill(false); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); + stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); + state.draw.vertex_buffer = stream_buffer->GetHandle(); - pipeline.Create(); - state.draw.program_pipeline = pipeline.handle; - state.draw.shader_program = 0; - state.draw.vertex_array = hw_vao.handle; - state.Apply(); + shader_program_manager = std::make_unique<GLShader::ProgramManager>(); + + state.draw.shader_program = 0; + state.draw.vertex_array = hw_vao.handle; + state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - vs_uniform_buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); - } else { - UNREACHABLE(); + for (unsigned index = 0; index < uniform_buffers.size(); ++index) { + auto& buffer = uniform_buffers[index]; + buffer.Create(); + glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, + GL_STREAM_COPY); + glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); } accelerate_draw = AccelDraw::Disabled; @@ -200,26 +171,74 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { buffer_offset += data_size; } -void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_VS); - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); -} +void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { + // Helper function for uploading uniform data + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; -void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { - MICROPROFILE_SCOPE(OpenGL_FS); - UNREACHABLE(); -} + auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); + ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); -bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { - if (!has_ARB_separate_shader_objects) { - UNREACHABLE(); - return false; + for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { + ptr_pos += sizeof(GLShader::MaxwellUniformData); + + auto& shader_config = gpu.regs.shader_config[index]; + const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; + + // VertexB program is always enabled, despite bit setting + const bool is_enabled{shader_config.enable || program == Maxwell::ShaderProgram::VertexB}; + + // Skip stages that are not enabled + if (!is_enabled) { + continue; + } + + // Upload uniform data as one UBO per stage + const auto& stage = index - 1; // Stage indices are 0 - 5 + const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + copy_buffer(uniform_buffers[stage].handle, ubo_offset, + sizeof(GLShader::MaxwellUniformData)); + GLShader::MaxwellUniformData* ub_ptr = + reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); + ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); + + // Fetch program code from memory + GLShader::ProgramCode program_code; + const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; + const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; + Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); + GLShader::ShaderSetup setup{std::move(program_code)}; + + switch (program) { + case Maxwell::ShaderProgram::VertexB: { + GLShader::MaxwellVSConfig vs_config{setup}; + shader_program_manager->UseProgrammableVertexShader(vs_config, setup); + break; + } + case Maxwell::ShaderProgram::Fragment: { + GLShader::MaxwellFSConfig fs_config{setup}; + shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unimplemented shader index=%d, enable=%d, offset=0x%08X", index, + shader_config.enable.Value(), shader_config.offset); + UNREACHABLE(); + } } + shader_program_manager->UseTrivialGeometryShader(); +} + +bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; DrawArrays(); - return true; } @@ -280,18 +299,6 @@ void RasterizerOpenGL::DrawArrays() { // Sync and bind the texture surfaces BindTextures(); - // Sync and bind the shader - if (shader_dirty) { - SetShader(); - shader_dirty = false; - } - - // Sync the uniform data - if (uniform_block_data.dirty) { - glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); - uniform_block_data.dirty = false; - } - // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable // scissor test to prevent drawing outside of the framebuffer region state.scissor.enabled = true; @@ -311,7 +318,9 @@ void RasterizerOpenGL::DrawArrays() { if (is_indexed) { UNREACHABLE(); } - buffer_size += sizeof(VSUniformData); + + // Uniform space for the 5 shader stages + buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; size_t ptr_pos = 0; u8* buffer_ptr; @@ -327,25 +336,12 @@ void RasterizerOpenGL::DrawArrays() { UNREACHABLE(); } - SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), - buffer_offset + static_cast<GLintptr>(ptr_pos)); - const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); - ptr_pos += sizeof(VSUniformData); + SetupShaders(buffer_ptr, buffer_offset, ptr_pos); stream_buffer->Unmap(); - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; - - copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); - - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + shader_program_manager->ApplyTo(state); + state.Apply(); if (is_indexed) { UNREACHABLE(); @@ -531,72 +527,6 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -void RasterizerOpenGL::SetShader() { - // TODO(bunnei): The below sets up a static test shader for passing untransformed vertices to - // OpenGL for rendering. This should be removed/replaced when we start emulating Maxwell - // shaders. - - static constexpr char vertex_shader[] = R"( -#version 150 core - -in vec2 vert_position; -in vec2 vert_tex_coord; -out vec2 frag_tex_coord; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)) * vert_position + mat3x2(0.0015625f, 0.0, 0.0, -0.0027778, -1.0, 1.0)[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - - static constexpr char fragment_shader[] = R"( -#version 150 core - -in vec2 frag_tex_coord; -out vec4 color; - -uniform sampler2D tex[32]; - -void main() { - color = texture(tex[0], frag_tex_coord); -} -)"; - - if (current_shader) { - return; - } - - LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); - - current_shader = &test_shader; - if (has_ARB_separate_shader_objects) { - test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); - glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); - } else { - UNREACHABLE(); - } - - state.draw.shader_program = test_shader.shader.handle; - state.Apply(); - - for (u32 texture = 0; texture < texture_samplers.size(); ++texture) { - // Set the texture samplers to correspond to different texture units - std::string uniform_name = "tex[" + std::to_string(texture) + "]"; - GLint uniform_tex = glGetUniformLocation(test_shader.shader.handle, uniform_name.c_str()); - if (uniform_tex != -1) { - glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); - } - } - - if (has_ARB_separate_shader_objects) { - state.draw.shader_program = 0; - state.Apply(); - } -} - void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, bool has_stencil) { state.draw.draw_framebuffer = framebuffer.handle; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d868bf421..71c21c69b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -15,10 +15,12 @@ #include "common/common_types.h" #include "common/hash.h" #include "common/vector_math.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -45,7 +47,7 @@ public: /// OpenGL shader generated for a given Maxwell register state struct MaxwellShader { /// OpenGL shader resource - OGLShader shader; + OGLProgram shader; }; struct VertexShader { @@ -56,34 +58,6 @@ public: OGLShader shader; }; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned - // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at - // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. - // Not following that rule will cause problems on some AMD drivers. - struct UniformData {}; - - // static_assert( - // sizeof(UniformData) == 0x460, - // "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16384, - "UniformData structure must be less than 16kb as per the OpenGL spec"); - - struct VSUniformData {}; - // static_assert( - // sizeof(VSUniformData) == 1856, - // "The size of the VSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(VSUniformData) < 16384, - "VSUniformData structure must be less than 16kb as per the OpenGL spec"); - - struct FSUniformData {}; - // static_assert( - // sizeof(FSUniformData) == 1856, - // "The size of the FSUniformData structure has changed, update the structure in the - // shader"); - static_assert(sizeof(FSUniformData) < 16384, - "FSUniformData structure must be less than 16kb as per the OpenGL spec"); - private: class SamplerInfo { public: @@ -122,9 +96,6 @@ private: /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); - /// Sets the OpenGL shader in accordance with the current guest state - void SetShader(); - /// Syncs the cull mode to match the guest state void SyncCullMode(); @@ -152,23 +123,12 @@ private: RasterizerCacheOpenGL res_cache; - /// Shader used for test renderering - to be removed once we have emulated shaders - MaxwellShader test_shader{}; - - const MaxwellShader* current_shader{}; - bool shader_dirty{}; - - struct { - UniformData data; - bool dirty; - } uniform_block_data = {}; - - OGLPipeline pipeline; + std::unique_ptr<GLShader::ProgramManager> shader_program_manager; OGLVertexArray sw_vao; OGLVertexArray hw_vao; std::array<bool, 16> hw_vao_enabled_attributes; - std::array<SamplerInfo, 32> texture_samplers; + std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; std::unique_ptr<OGLStreamBuffer> vertex_buffer; OGLBuffer uniform_buffer; @@ -182,19 +142,9 @@ private: void AnalyzeVertexArray(bool is_indexed); void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); - OGLBuffer vs_uniform_buffer; - std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map; - std::unordered_map<std::string, VertexShader> vs_shader_cache; - OGLShader vs_default_shader; - - void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); - - OGLBuffer fs_uniform_buffer; - std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map; - std::unordered_map<std::string, FragmentShader> fs_shader_cache; - OGLShader fs_default_shader; + std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; - void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); + void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5cbafa2e7..213b20a21 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -818,7 +818,7 @@ void main() { color = texelFetch(tbo, tbo_offset).rabg; } )"; - d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); + d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); OpenGLState state = OpenGLState::GetCurState(); GLuint old_program = state.draw.shader_program; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 06524fc59..e7ce506cf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -334,7 +334,7 @@ private: OGLVertexArray attributeless_vao; OGLBuffer d24s8_abgr_buffer; GLsizeiptr d24s8_abgr_buffer_size; - OGLShader d24s8_abgr_shader; + OGLProgram d24s8_abgr_shader; GLint d24s8_abgr_tbo_size_u_id; GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 7da5e74d1..2f0e7ac1a 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -13,14 +13,16 @@ class OGLTexture : private NonCopyable { public: OGLTexture() = default; - OGLTexture(OGLTexture&& o) { - std::swap(handle, o.handle); - } + + OGLTexture(OGLTexture&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLTexture() { Release(); } + OGLTexture& operator=(OGLTexture&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -46,14 +48,16 @@ public: class OGLSampler : private NonCopyable { public: OGLSampler() = default; - OGLSampler(OGLSampler&& o) { - std::swap(handle, o.handle); - } + + OGLSampler(OGLSampler&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLSampler() { Release(); } + OGLSampler& operator=(OGLSampler&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -79,25 +83,71 @@ public: class OGLShader : private NonCopyable { public: OGLShader() = default; - OGLShader(OGLShader&& o) { - std::swap(handle, o.handle); - } + + OGLShader(OGLShader&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLShader() { Release(); } + OGLShader& operator=(OGLShader&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } - /// Creates a new internal OpenGL resource and stores the handle - void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, - const std::vector<const char*>& feedback_vars = {}, - bool separable_program = false) { + void Create(const char* source, GLenum type) { + if (handle != 0) + return; + if (source == nullptr) + return; + handle = GLShader::LoadShader(source, type); + } + + void Release() { + if (handle == 0) + return; + glDeleteShader(handle); + handle = 0; + } + + GLuint handle = 0; +}; + +class OGLProgram : private NonCopyable { +public: + OGLProgram() = default; + + OGLProgram(OGLProgram&& o) : handle(std::exchange(o.handle, 0)) {} + + ~OGLProgram() { + Release(); + } + + OGLProgram& operator=(OGLProgram&& o) { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + template <typename... T> + void Create(bool separable_program, T... shaders) { if (handle != 0) return; - handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, - separable_program); + handle = GLShader::LoadProgram(separable_program, shaders...); + } + + /// Creates a new internal OpenGL resource and stores the handle + void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, + bool separable_program = false) { + OGLShader vert, geo, frag; + if (vert_shader) + vert.Create(vert_shader, GL_VERTEX_SHADER); + if (geo_shader) + geo.Create(geo_shader, GL_GEOMETRY_SHADER); + if (frag_shader) + frag.Create(frag_shader, GL_FRAGMENT_SHADER); + Create(separable_program, vert.handle, geo.handle, frag.handle); } /// Deletes the internal OpenGL resource @@ -148,14 +198,16 @@ public: class OGLBuffer : private NonCopyable { public: OGLBuffer() = default; - OGLBuffer(OGLBuffer&& o) { - std::swap(handle, o.handle); - } + + OGLBuffer(OGLBuffer&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLBuffer() { Release(); } + OGLBuffer& operator=(OGLBuffer&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -214,14 +266,16 @@ public: class OGLVertexArray : private NonCopyable { public: OGLVertexArray() = default; - OGLVertexArray(OGLVertexArray&& o) { - std::swap(handle, o.handle); - } + + OGLVertexArray(OGLVertexArray&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLVertexArray() { Release(); } + OGLVertexArray& operator=(OGLVertexArray&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } @@ -247,14 +301,16 @@ public: class OGLFramebuffer : private NonCopyable { public: OGLFramebuffer() = default; - OGLFramebuffer(OGLFramebuffer&& o) { - std::swap(handle, o.handle); - } + + OGLFramebuffer(OGLFramebuffer&& o) : handle(std::exchange(o.handle, 0)) {} + ~OGLFramebuffer() { Release(); } + OGLFramebuffer& operator=(OGLFramebuffer&& o) { - std::swap(handle, o.handle); + Release(); + handle = std::exchange(o.handle, 0); return *this; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 564ea8f9e..1290fa4cd 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2,57 +2,499 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <map> +#include <set> #include <string> -#include <queue> #include "common/assert.h" #include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" -namespace Maxwell3D { -namespace Shader { +namespace GLShader { namespace Decompiler { +using Tegra::Shader::Attribute; +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Register; +using Tegra::Shader::SubOp; +using Tegra::Shader::Uniform; + constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; -class Impl { +class DecompileFail : public std::runtime_error { public: - Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, - const std::string& emit_cb, const std::string& setemit_cb) - : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), - inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), - sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} - - std::string Decompile() { - UNREACHABLE(); - return {}; + using std::runtime_error::runtime_error; +}; + +/// Describes the behaviour of code path of a given entry point and a return point. +enum class ExitMethod { + Undetermined, ///< Internal value. Only occur when analyzing JMP loop. + AlwaysReturn, ///< All code paths reach the return point. + Conditional, ///< Code path reaches the return point or an END instruction conditionally. + AlwaysEnd, ///< All code paths reach a END instruction. +}; + +/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. +struct Subroutine { + /// Generates a name suitable for GLSL source code. + std::string GetName() const { + return "sub_" + std::to_string(begin) + "_" + std::to_string(end); + } + + u32 begin; ///< Entry point of the subroutine. + u32 end; ///< Return point of the subroutine. + ExitMethod exit_method; ///< Exit method of the subroutine. + std::set<u32> labels; ///< Addresses refereced by JMP instructions. + + bool operator<(const Subroutine& rhs) const { + return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); + } +}; + +/// Analyzes shader code and produces a set of subroutines. +class ControlFlowAnalyzer { +public: + ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) + : program_code(program_code) { + + // Recursively finds all subroutines. + const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); + if (program_main.exit_method != ExitMethod::AlwaysEnd) + throw DecompileFail("Program does not always end"); + } + + std::set<Subroutine> GetSubroutines() { + return std::move(subroutines); } private: - const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; - u32 main_offset; - const std::function<std::string(u32)>& inputreg_getter; - const std::function<std::string(u32)>& outputreg_getter; - bool sanitize_mul; - const std::string& emit_cb; - const std::string& setemit_cb; + const ProgramCode& program_code; + std::set<Subroutine> subroutines; + std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; + + /// Adds and analyzes a new subroutine if it is not added yet. + const Subroutine& AddSubroutine(u32 begin, u32 end) { + auto iter = subroutines.find(Subroutine{begin, end}); + if (iter != subroutines.end()) + return *iter; + + Subroutine subroutine{begin, end}; + subroutine.exit_method = Scan(begin, end, subroutine.labels); + if (subroutine.exit_method == ExitMethod::Undetermined) + throw DecompileFail("Recursive function detected"); + return *subroutines.insert(std::move(subroutine)).first; + } + + /// Scans a range of code for labels and determines the exit method. + ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { + auto [iter, inserted] = + exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); + ExitMethod& exit_method = iter->second; + if (!inserted) + return exit_method; + + for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { + const Instruction instr = {program_code[offset]}; + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::EXIT: { + return exit_method = ExitMethod::AlwaysEnd; + } + } + } + return exit_method = ExitMethod::AlwaysReturn; + } }; -std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, - u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb, - const std::string& setemit_cb) { - Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, - sanitize_mul, emit_cb, setemit_cb); - return impl.Decompile(); +class ShaderWriter { +public: + void AddLine(const std::string& text) { + DEBUG_ASSERT(scope >= 0); + if (!text.empty()) { + shader_source += std::string(static_cast<size_t>(scope) * 4, ' '); + } + shader_source += text + '\n'; + } + + std::string GetResult() { + return std::move(shader_source); + } + + int scope = 0; + +private: + std::string shader_source; +}; + +class GLSLGenerator { +public: + GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, + u32 main_offset, Maxwell3D::Regs::ShaderStage stage) + : subroutines(subroutines), program_code(program_code), main_offset(main_offset), + stage(stage) { + + Generate(); + } + + std::string GetShaderCode() { + return declarations.GetResult() + shader.GetResult(); + } + +private: + /// Gets the Subroutine object corresponding to the specified address. + const Subroutine& GetSubroutine(u32 begin, u32 end) const { + auto iter = subroutines.find(Subroutine{begin, end}); + ASSERT(iter != subroutines.end()); + return *iter; + } + + /// Generates code representing an input attribute register. + std::string GetInputAttribute(Attribute::Index attribute) { + declr_input_attribute.insert(attribute); + + const u32 index{static_cast<u32>(attribute) - + static_cast<u32>(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + return "input_attribute_" + std::to_string(index); + } + + LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index); + UNREACHABLE(); + } + + /// Generates code representing an output attribute register. + std::string GetOutputAttribute(Attribute::Index attribute) { + switch (attribute) { + case Attribute::Index::Position: + return "gl_Position"; + default: + const u32 index{static_cast<u32>(attribute) - + static_cast<u32>(Attribute::Index::Attribute_0)}; + if (attribute >= Attribute::Index::Attribute_0) { + declr_output_attribute.insert(attribute); + return "output_attribute_" + std::to_string(index); + } + + LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index); + UNREACHABLE(); + } + } + + /// Generates code representing a temporary (GPR) register. + std::string GetRegister(const Register& reg) { + return *declr_register.insert("register_" + std::to_string(reg)).first; + } + + /// Generates code representing a uniform (C buffer) register. + std::string GetUniform(const Uniform& reg) const { + std::string index = std::to_string(reg.index); + return "uniform_" + index + "[" + std::to_string(reg.offset >> 2) + "][" + + std::to_string(reg.offset & 3) + "]"; + } + + /** + * Adds code that calls a subroutine. + * @param subroutine the subroutine to call. + */ + void CallSubroutine(const Subroutine& subroutine) { + if (subroutine.exit_method == ExitMethod::AlwaysEnd) { + shader.AddLine(subroutine.GetName() + "();"); + shader.AddLine("return true;"); + } else if (subroutine.exit_method == ExitMethod::Conditional) { + shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); + } else { + shader.AddLine(subroutine.GetName() + "();"); + } + } + + /** + * Writes code that does an assignment operation. + * @param reg the destination register code. + * @param value the code representing the value to assign. + */ + void SetDest(u64 elem, const std::string& reg, const std::string& value, + u64 dest_num_components, u64 value_num_components) { + std::string swizzle = "."; + swizzle += "xyzw"[elem]; + + std::string dest = reg + (dest_num_components != 1 ? swizzle : ""); + std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : ""); + + shader.AddLine(dest + " = " + src + ";"); + } + + /** + * Compiles a single instruction from Tegra to GLSL. + * @param offset the offset of the Tegra shader instruction. + * @return the offset of the next instruction to execute. Usually it is the current offset + * + 1. If the current instruction always terminates the program, returns PROGRAM_END. + */ + u32 CompileInstr(u32 offset) { + const Instruction instr = {program_code[offset]}; + + shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name); + + switch (OpCode::GetInfo(instr.opcode).type) { + case OpCode::Type::Arithmetic: { + ASSERT(!instr.alu.abs_d); + + std::string dest = GetRegister(instr.gpr0); + std::string op_a = instr.alu.negate_a ? "-" : ""; + op_a += GetRegister(instr.gpr8); + if (instr.alu.abs_a) { + op_a = "abs(" + op_a + ")"; + } + + std::string op_b = instr.alu.negate_b ? "-" : ""; + if (instr.is_b_gpr) { + op_b += GetRegister(instr.gpr20); + } else { + op_b += GetUniform(instr.uniform); + } + if (instr.alu.abs_b) { + op_b = "abs(" + op_b + ")"; + } + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::FMUL_C: + case OpCode::Id::FMUL_R: { + SetDest(0, dest, op_a + " * " + op_b, 1, 1); + break; + } + case OpCode::Id::FADD_C: + case OpCode::Id::FADD_R: { + SetDest(0, dest, op_a + " + " + op_b, 1, 1); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + case OpCode::Type::Ffma: { + ASSERT_MSG(!instr.ffma.negate_b, "untested"); + ASSERT_MSG(!instr.ffma.negate_c, "untested"); + + std::string dest = GetRegister(instr.gpr0); + std::string op_a = GetRegister(instr.gpr8); + + std::string op_b = instr.ffma.negate_b ? "-" : ""; + op_b += GetUniform(instr.uniform); + + std::string op_c = instr.ffma.negate_c ? "-" : ""; + op_c += GetRegister(instr.gpr39); + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::FFMA_CR: { + SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1); + break; + } + + default: { + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + case OpCode::Type::Memory: { + std::string gpr0 = GetRegister(instr.gpr0); + const Attribute::Index attribute = instr.attribute.fmt20.index; + + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::LD_A: { + ASSERT(instr.attribute.fmt20.size == 0); + SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4); + break; + } + case OpCode::Id::ST_A: { + ASSERT(instr.attribute.fmt20.size == 0); + SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + break; + } + + default: { + switch (instr.opcode.EffectiveOpCode()) { + case OpCode::Id::EXIT: { + shader.AddLine("return true;"); + offset = PROGRAM_END - 1; + break; + } + + default: { + LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", + static_cast<unsigned>(instr.opcode.EffectiveOpCode()), + OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex); + throw DecompileFail("Unhandled instruction"); + break; + } + } + + break; + } + } + + return offset + 1; + } + + /** + * Compiles a range of instructions from Tegra to GLSL. + * @param begin the offset of the starting instruction. + * @param end the offset where the compilation should stop (exclusive). + * @return the offset of the next instruction to compile. PROGRAM_END if the program + * terminates. + */ + u32 CompileRange(u32 begin, u32 end) { + u32 program_counter; + for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { + program_counter = CompileInstr(program_counter); + } + return program_counter; + } + + void Generate() { + // Add declarations for all subroutines + for (const auto& subroutine : subroutines) { + shader.AddLine("bool " + subroutine.GetName() + "();"); + } + shader.AddLine(""); + + // Add the main entry point + shader.AddLine("bool exec_shader() {"); + ++shader.scope; + CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); + --shader.scope; + shader.AddLine("}\n"); + + // Add definitions for all subroutines + for (const auto& subroutine : subroutines) { + std::set<u32> labels = subroutine.labels; + + shader.AddLine("bool " + subroutine.GetName() + "() {"); + ++shader.scope; + + if (labels.empty()) { + if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { + shader.AddLine("return false;"); + } + } else { + labels.insert(subroutine.begin); + shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + shader.AddLine("while (true) {"); + ++shader.scope; + + shader.AddLine("switch (jmp_to) {"); + + for (auto label : labels) { + shader.AddLine("case " + std::to_string(label) + "u: {"); + ++shader.scope; + + auto next_it = labels.lower_bound(label + 1); + u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; + + u32 compile_end = CompileRange(label, next_label); + if (compile_end > next_label && compile_end != PROGRAM_END) { + // This happens only when there is a label inside a IF/LOOP block + shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + labels.emplace(compile_end); + } + + --shader.scope; + shader.AddLine("}"); + } + + shader.AddLine("default: return false;"); + shader.AddLine("}"); + + --shader.scope; + shader.AddLine("}"); + + shader.AddLine("return false;"); + } + + --shader.scope; + shader.AddLine("}\n"); + + DEBUG_ASSERT(shader.scope == 0); + } + + GenerateDeclarations(); + } + + /// Add declarations for registers + void GenerateDeclarations() { + for (const auto& reg : declr_register) { + declarations.AddLine("float " + reg + " = 0.0;"); + } + declarations.AddLine(""); + + for (const auto& index : declr_input_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine("layout(location = " + + std::to_string(static_cast<u32>(index) - + static_cast<u32>(Attribute::Index::Attribute_0)) + + ") in vec4 " + GetInputAttribute(index) + ";"); + } + declarations.AddLine(""); + + for (const auto& index : declr_output_attribute) { + // TODO(bunnei): Use proper number of elements for these + declarations.AddLine("layout(location = " + + std::to_string(static_cast<u32>(index) - + static_cast<u32>(Attribute::Index::Attribute_0)) + + ") out vec4 " + GetOutputAttribute(index) + ";"); + } + declarations.AddLine(""); + } + +private: + const std::set<Subroutine>& subroutines; + const ProgramCode& program_code; + const u32 main_offset; + Maxwell3D::Regs::ShaderStage stage; + + ShaderWriter shader; + ShaderWriter declarations; + + // Declarations + std::set<std::string> declr_register; + std::set<Attribute::Index> declr_input_attribute; + std::set<Attribute::Index> declr_output_attribute; +}; // namespace Decompiler + +std::string GetCommonDeclarations() { + return "bool exec_shader();"; +} + +boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage) { + try { + auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); + GLSLGenerator generator(subroutines, program_code, main_offset, stage); + return generator.GetShaderCode(); + } catch (const DecompileFail& exception) { + LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what()); + } + return boost::none; } } // namespace Decompiler -} // namespace Shader -} // namespace Maxwell3D +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02ebfcbe8..2f4047d87 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -5,23 +5,20 @@ #include <array> #include <functional> #include <string> +#include <boost/optional.hpp> #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" -namespace Maxwell3D { -namespace Shader { +namespace GLShader { namespace Decompiler { -constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; -constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; +using Tegra::Engines::Maxwell3D; -std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, - const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, - u32 main_offset, - const std::function<std::string(u32)>& inputreg_getter, - const std::function<std::string(u32)>& outputreg_getter, - bool sanitize_mul, const std::string& emit_cb = "", - const std::string& setemit_cb = ""); +std::string GetCommonDeclarations(); + +boost::optional<std::string> DecompileProgram(const ProgramCode& program_code, u32 main_offset, + Maxwell3D::Regs::ShaderStage stage); } // namespace Decompiler -} // namespace Shader -} // namespace Maxwell3D +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8f3c98800..524c2cfb5 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -7,12 +7,12 @@ namespace GLShader { -std::string GenerateVertexShader(const MaxwellVSConfig& config) { +std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { UNREACHABLE(); return {}; } -std::string GenerateFragmentShader(const MaxwellFSConfig& config) { +std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 5101e7d30..925e66ee4 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,46 +4,67 @@ #pragma once -#include <cstring> +#include <array> #include <string> #include <type_traits> +#include "common/common_types.h" #include "common/hash.h" namespace GLShader { -enum Attributes { - ATTRIBUTE_POSITION, - ATTRIBUTE_COLOR, - ATTRIBUTE_TEXCOORD0, - ATTRIBUTE_TEXCOORD1, - ATTRIBUTE_TEXCOORD2, - ATTRIBUTE_TEXCOORD0_W, - ATTRIBUTE_NORMQUAT, - ATTRIBUTE_VIEW, -}; +constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; -struct MaxwellShaderConfigCommon { - explicit MaxwellShaderConfigCommon(){}; +using ProgramCode = std::array<u64, MAX_PROGRAM_CODE_LENGTH>; + +struct ShaderSetup { + ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} + + ProgramCode program_code; + bool program_code_hash_dirty = true; + + u64 GetProgramCodeHash() { + if (program_code_hash_dirty) { + program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + program_code_hash_dirty = false; + } + return program_code_hash; + } + +private: + u64 program_code_hash{}; }; -struct MaxwellVSConfig : MaxwellShaderConfigCommon { - explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} +struct MaxwellShaderConfigCommon { + void Init(ShaderSetup& setup) { + program_hash = setup.GetProgramCodeHash(); + } - bool operator==(const MaxwellVSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; - }; + u64 program_hash; }; -struct MaxwellFSConfig : MaxwellShaderConfigCommon { - explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} +struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { + explicit MaxwellVSConfig(ShaderSetup& setup) { + state.Init(setup); + } +}; - bool operator==(const MaxwellFSConfig& o) const { - return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; - }; +struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { + explicit MaxwellFSConfig(ShaderSetup& setup) { + state.Init(setup); + } }; -std::string GenerateVertexShader(const MaxwellVSConfig& config); -std::string GenerateFragmentShader(const MaxwellFSConfig& config); +/** + * Generates the GLSL vertex shader program source code for the given VS program + * @returns String of the shader source code + */ +std::string GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); + +/** + * Generates the GLSL fragment shader program source code for the given FS program + * @returns String of the shader source code + */ +std::string GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); } // namespace GLShader @@ -52,14 +73,14 @@ namespace std { template <> struct hash<GLShader::MaxwellVSConfig> { size_t operator()(const GLShader::MaxwellVSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); + return k.Hash(); } }; template <> struct hash<GLShader::MaxwellFSConfig> { size_t operator()(const GLShader::MaxwellFSConfig& k) const { - return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); + return k.Hash(); } }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp new file mode 100644 index 000000000..7fceedce8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -0,0 +1,65 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "core/core.h" +#include "core/hle/kernel/process.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" + +namespace GLShader { + +namespace Impl { +void SetShaderUniformBlockBinding(GLuint shader, const char* name, + Maxwell3D::Regs::ShaderStage binding, size_t expected_size) { + GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index != GL_INVALID_INDEX) { + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, + "Uniform block size did not match! Got %d, expected %zu", + static_cast<int>(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); + } +} + +void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, + sizeof(MaxwellUniformData)); + SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, + sizeof(MaxwellUniformData)); +} + +void SetShaderSamplerBindings(GLuint shader) { + OpenGLState cur_state = OpenGLState::GetCurState(); + GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); + cur_state.Apply(); + + // Set the texture samplers to correspond to different texture units + for (u32 texture = 0; texture < NumTextureSamplers; ++texture) { + // Set the texture samplers to correspond to different texture units + std::string uniform_name = "tex[" + std::to_string(texture) + "]"; + GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str()); + if (uniform_tex != -1) { + glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id); + } + } + + cur_state.draw.shader_program = old_program; + cur_state.Apply(); +} + +} // namespace Impl + +void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + for (unsigned index = 0; index < shader_stage.const_buffers.size(); ++index) { + const auto& const_buffer = shader_stage.const_buffers[index]; + const VAddr vaddr = memory_manager->PhysicalToVirtualAddress(const_buffer.address); + Memory::ReadBlock(vaddr, const_buffers[index].data(), sizeof(ConstBuffer)); + } +} + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h new file mode 100644 index 000000000..5c8560cf5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -0,0 +1,151 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <tuple> +#include <unordered_map> +#include <boost/functional/hash.hpp> +#include <glad/glad.h> +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" + +namespace GLShader { + +/// Number of OpenGL texture samplers that can be used in the fragment shader +static constexpr size_t NumTextureSamplers = 32; + +using Tegra::Engines::Maxwell3D; + +namespace Impl { +void SetShaderUniformBlockBindings(GLuint shader); +void SetShaderSamplerBindings(GLuint shader); +} // namespace Impl + +/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned +// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at +// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +// Not following that rule will cause problems on some AMD drivers. +struct MaxwellUniformData { + void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); + + using ConstBuffer = std::array<GLvec4, 4>; + alignas(16) std::array<ConstBuffer, Maxwell3D::Regs::MaxConstBuffers> const_buffers; +}; +static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) < 16384, + "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); + +class OGLShaderStage { +public: + OGLShaderStage() = default; + + void Create(const char* source, GLenum type) { + OGLShader shader; + shader.Create(source, type); + program.Create(true, shader.handle); + Impl::SetShaderUniformBlockBindings(program.handle); + Impl::SetShaderSamplerBindings(program.handle); + } + GLuint GetHandle() const { + return program.handle; + } + +private: + OGLProgram program; +}; + +// TODO(wwylele): beautify this doc +// This is a shader cache designed for translating PICA shader to GLSL shader. +// The double cache is needed because diffent KeyConfigType, which includes a hash of the code +// region (including its leftover unused code) can generate the same GLSL code. +template <typename KeyConfigType, + std::string (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), GLenum ShaderType> +class ShaderCache { +public: + ShaderCache() = default; + + GLuint Get(const KeyConfigType& key, const ShaderSetup& setup) { + auto map_it = shader_map.find(key); + if (map_it == shader_map.end()) { + std::string program = CodeGenerator(setup, key); + + auto [iter, new_shader] = shader_cache.emplace(program, OGLShaderStage{}); + OGLShaderStage& cached_shader = iter->second; + if (new_shader) { + cached_shader.Create(program.c_str(), ShaderType); + } + shader_map[key] = &cached_shader; + return cached_shader.GetHandle(); + } else { + return map_it->second->GetHandle(); + } + } + +private: + std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map; + std::unordered_map<std::string, OGLShaderStage> shader_cache; +}; + +using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>; + +using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>; + +class ProgramManager { +public: + ProgramManager() { + pipeline.Create(); + } + + void UseProgrammableVertexShader(const MaxwellVSConfig& config, const ShaderSetup setup) { + current.vs = vertex_shaders.Get(config, setup); + } + + void UseProgrammableFragmentShader(const MaxwellFSConfig& config, const ShaderSetup setup) { + current.fs = fragment_shaders.Get(config, setup); + } + + void UseTrivialGeometryShader() { + current.gs = 0; + } + + void ApplyTo(OpenGLState& state) { + // Workaround for AMD bug + glUseProgramStages(pipeline.handle, + GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, + 0); + + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); + glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); + state.draw.shader_program = 0; + state.draw.program_pipeline = pipeline.handle; + } + +private: + struct ShaderTuple { + GLuint vs = 0, gs = 0, fs = 0; + bool operator==(const ShaderTuple& rhs) const { + return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); + } + struct Hash { + std::size_t operator()(const ShaderTuple& tuple) const { + std::size_t hash = 0; + boost::hash_combine(hash, tuple.vs); + boost::hash_combine(hash, tuple.gs); + boost::hash_combine(hash, tuple.fs); + return hash; + } + }; + }; + ShaderTuple current; + VertexShaders vertex_shaders; + FragmentShaders fragment_shaders; + + std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache; + OGLPipeline pipeline; +}; + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a6c6204d5..8568fface 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -10,156 +10,41 @@ namespace GLShader { -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector<const char*>& feedback_vars, - bool separable_program) { - // Create the shaders - GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; - GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; - GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; +GLuint LoadShader(const char* source, GLenum type) { + const char* debug_type; + switch (type) { + case GL_VERTEX_SHADER: + debug_type = "vertex"; + break; + case GL_GEOMETRY_SHADER: + debug_type = "geometry"; + break; + case GL_FRAGMENT_SHADER: + debug_type = "fragment"; + break; + default: + UNREACHABLE(); + } + GLuint shader_id = glCreateShader(type); + glShaderSource(shader_id, 1, &source, nullptr); + NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); + glCompileShader(shader_id); GLint result = GL_FALSE; - int info_log_length; - - if (vertex_shader) { - // Compile Vertex Shader - LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); - - glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); - glCompileShader(vertex_shader_id); - - // Check Vertex Shader - glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> vertex_shader_error(info_log_length); - glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", - &vertex_shader_error[0]); - } - } - } - - if (geometry_shader) { - // Compile Geometry Shader - LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); - - glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); - glCompileShader(geometry_shader_id); - - // Check Geometry Shader - glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> geometry_shader_error(info_log_length); - glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, - &geometry_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", - &geometry_shader_error[0]); - } - } - } - - if (fragment_shader) { - // Compile Fragment Shader - LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); - - glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); - glCompileShader(fragment_shader_id); - - // Check Fragment Shader - glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> fragment_shader_error(info_log_length); - glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, - &fragment_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); - } else { - LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); - } - } - } - - // Link the program - LOG_DEBUG(Render_OpenGL, "Linking program..."); - - GLuint program_id = glCreateProgram(); - if (vertex_shader) { - glAttachShader(program_id, vertex_shader_id); - } - if (geometry_shader) { - glAttachShader(program_id, geometry_shader_id); - } - if (fragment_shader) { - glAttachShader(program_id, fragment_shader_id); - } - - if (!feedback_vars.empty()) { - auto varyings = feedback_vars; - glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()), - &varyings[0], GL_INTERLEAVED_ATTRIBS); - } - - if (separable_program) { - glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); - } - - glLinkProgram(program_id); - - // Check the program - glGetProgramiv(program_id, GL_LINK_STATUS, &result); - glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + GLint info_log_length; + glGetShaderiv(shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(shader_id, GL_INFO_LOG_LENGTH, &info_log_length); if (info_log_length > 1) { - std::vector<char> program_error(info_log_length); - glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + std::string shader_error(info_log_length, ' '); + glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); + NGLOG_DEBUG(Render_OpenGL, "{}", shader_error); } else { - LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); } } - - // If the program linking failed at least one of the shaders was probably bad - if (result == GL_FALSE) { - if (vertex_shader) { - LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); - } - if (geometry_shader) { - LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); - } - if (fragment_shader) { - LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); - } - } - ASSERT_MSG(result == GL_TRUE, "Shader not linked"); - - if (vertex_shader) { - glDetachShader(program_id, vertex_shader_id); - glDeleteShader(vertex_shader_id); - } - if (geometry_shader) { - glDetachShader(program_id, geometry_shader_id); - glDeleteShader(geometry_shader_id); - } - if (fragment_shader) { - glDetachShader(program_id, fragment_shader_id); - glDeleteShader(fragment_shader_id); - } - - return program_id; + return shader_id; } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index fc7b5e080..a1fa9e814 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -6,18 +6,60 @@ #include <vector> #include <glad/glad.h> +#include "common/assert.h" +#include "common/logging/log.h" namespace GLShader { /** + * Utility function to create and compile an OpenGL GLSL shader + * @param source String of the GLSL shader program + * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) + */ +GLuint LoadShader(const char* source, GLenum type); + +/** * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) - * @param vertex_shader String of the GLSL vertex shader program - * @param geometry_shader String of the GLSL geometry shader program - * @param fragment_shader String of the GLSL fragment shader program - * @returns Handle of the newly created OpenGL shader object + * @param separable_program whether to create a separable program + * @param shaders ID of shaders to attach to the program + * @returns Handle of the newly created OpenGL program object */ -GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, - const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, - bool separable_program = false); +template <typename... T> +GLuint LoadProgram(bool separable_program, T... shaders) { + // Link the program + NGLOG_DEBUG(Render_OpenGL, "Linking program..."); + + GLuint program_id = glCreateProgram(); + + ((shaders == 0 ? (void)0 : glAttachShader(program_id, shaders)), ...); + + if (separable_program) { + glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); + } + + glLinkProgram(program_id); + + // Check the program + GLint result = GL_FALSE; + GLint info_log_length; + glGetProgramiv(program_id, GL_LINK_STATUS, &result); + glGetProgramiv(program_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (info_log_length > 1) { + std::string program_error(info_log_length, ' '); + glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); + if (result == GL_TRUE) { + NGLOG_DEBUG(Render_OpenGL, "{}", program_error); + } else { + NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); + } + } + + ASSERT_MSG(result == GL_TRUE, "Shader not linked"); + + ((shaders == 0 ? (void)0 : glDetachShader(program_id, shaders)), ...); + + return program_id; +} } // namespace GLShader diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 48ee80125..7909dcfc3 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -10,6 +10,14 @@ #include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" +using GLvec2 = std::array<GLfloat, 2>; +using GLvec3 = std::array<GLfloat, 3>; +using GLvec4 = std::array<GLfloat, 4>; + +using GLuvec2 = std::array<GLuint, 2>; +using GLuvec3 = std::array<GLuint, 3>; +using GLuvec4 = std::array<GLuint, 4>; + namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -39,6 +47,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { + case Maxwell::PrimitiveTopology::Triangles: + return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 78b50b227..5e78723a2 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -57,7 +57,7 @@ uniform sampler2D color_texture; void main() { // Swap RGBA -> ABGR so we don't have to do this on the CPU. This needs to change if we have to // support more framebuffer pixel formats. - color = texture(color_texture, frag_tex_coord).abgr; + color = texture(color_texture, frag_tex_coord); } )"; @@ -210,7 +210,7 @@ void RendererOpenGL::InitOpenGLObjects() { 0.0f); // Link shaders and get variable locations - shader.Create(vertex_shader, nullptr, fragment_shader); + shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); state.draw.shader_program = shader.handle; state.Apply(); uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); @@ -311,10 +311,10 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, } std::array<ScreenRectVertex, 4> vertices = {{ - ScreenRectVertex(x, y, texcoords.top, right), - ScreenRectVertex(x + w, y, texcoords.bottom, right), - ScreenRectVertex(x, y + h, texcoords.top, left), - ScreenRectVertex(x + w, y + h, texcoords.bottom, left), + ScreenRectVertex(x, y, texcoords.top, left), + ScreenRectVertex(x + w, y, texcoords.bottom, left), + ScreenRectVertex(x, y + h, texcoords.top, right), + ScreenRectVertex(x + w, y + h, texcoords.bottom, right), }}; state.texture_units[0].texture_2d = screen_info.display_texture; diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c52f40037..2cc6d9a00 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -72,7 +72,7 @@ private: // OpenGL object IDs OGLVertexArray vertex_array; OGLBuffer vertex_buffer; - OGLShader shader; + OGLProgram shader; /// Display information for Switch screen ScreenInfo screen_info; diff --git a/src/video_core/utils.h b/src/video_core/utils.h index be0f7e22b..e0a14d48f 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -151,7 +151,7 @@ static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixe const u32 coarse_y = y & ~127; u32 morton_offset = GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; - u32 gl_pixel_index = (x + (height - 1 - y) * width) * gl_bytes_per_pixel; + u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel; data_ptrs[morton_to_gl] = morton_data + morton_offset; data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 34e33170e..534145f7e 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -776,9 +776,11 @@ void GMainWindow::closeEvent(QCloseEvent* event) { return; } - UISettings::values.geometry = saveGeometry(); + if (ui.action_Fullscreen->isChecked()) { + UISettings::values.geometry = saveGeometry(); + UISettings::values.renderwindow_geometry = render_window->saveGeometry(); + } UISettings::values.state = saveState(); - UISettings::values.renderwindow_geometry = render_window->saveGeometry(); #if MICROPROFILE_ENABLED UISettings::values.microprofile_geometry = microProfileDialog->saveGeometry(); UISettings::values.microprofile_visible = microProfileDialog->isVisible(); |