summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/audio_core/algorithm/interpolate.cpp198
-rw-r--r--src/audio_core/algorithm/interpolate.h9
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/core.cpp8
-rw-r--r--src/core/core.h6
-rw-r--r--src/core/hardware_properties.h2
-rw-r--r--src/core/hle/kernel/kernel.cpp121
-rw-r--r--src/core/hle/kernel/kernel.h37
-rw-r--r--src/core/hle/kernel/scheduler.cpp56
-rw-r--r--src/core/hle/kernel/scheduler.h46
-rw-r--r--src/core/hle/kernel/thread.cpp12
-rw-r--r--src/core/hle/kernel/thread.h6
-rw-r--r--src/core/hle/kernel/time_manager.cpp44
-rw-r--r--src/core/hle/kernel/time_manager.h43
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp10
-rwxr-xr-xsrc/input_common/analog_from_button.cpp14
-rw-r--r--src/video_core/engines/maxwell_3d.h9
-rw-r--r--src/video_core/gpu.cpp65
-rw-r--r--src/video_core/gpu.h7
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp36
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp52
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h4
-rw-r--r--src/video_core/shader/decode/texture.cpp2
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h72
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_base.cpp4
-rw-r--r--src/video_core/texture_cache/surface_params.cpp47
-rw-r--r--src/video_core/texture_cache/surface_params.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h24
37 files changed, 740 insertions, 264 deletions
diff --git a/src/audio_core/algorithm/interpolate.cpp b/src/audio_core/algorithm/interpolate.cpp
index 5005ba519..a58f24169 100644
--- a/src/audio_core/algorithm/interpolate.cpp
+++ b/src/audio_core/algorithm/interpolate.cpp
@@ -5,6 +5,7 @@
#define _USE_MATH_DEFINES
#include <algorithm>
+#include <climits>
#include <cmath>
#include <vector>
#include "audio_core/algorithm/interpolate.h"
@@ -13,13 +14,131 @@
namespace AudioCore {
-/// The Lanczos kernel
-static double Lanczos(std::size_t a, double x) {
- if (x == 0.0)
- return 1.0;
- const double px = M_PI * x;
- return a * std::sin(px) * std::sin(px / a) / (px * px);
-}
+constexpr std::array<s16, 512> curve_lut0 = {
+ 6600, 19426, 6722, 3, 6479, 19424, 6845, 9, 6359, 19419, 6968, 15, 6239,
+ 19412, 7093, 22, 6121, 19403, 7219, 28, 6004, 19391, 7345, 34, 5888, 19377,
+ 7472, 41, 5773, 19361, 7600, 48, 5659, 19342, 7728, 55, 5546, 19321, 7857,
+ 62, 5434, 19298, 7987, 69, 5323, 19273, 8118, 77, 5213, 19245, 8249, 84,
+ 5104, 19215, 8381, 92, 4997, 19183, 8513, 101, 4890, 19148, 8646, 109, 4785,
+ 19112, 8780, 118, 4681, 19073, 8914, 127, 4579, 19031, 9048, 137, 4477, 18988,
+ 9183, 147, 4377, 18942, 9318, 157, 4277, 18895, 9454, 168, 4179, 18845, 9590,
+ 179, 4083, 18793, 9726, 190, 3987, 18738, 9863, 202, 3893, 18682, 10000, 215,
+ 3800, 18624, 10137, 228, 3709, 18563, 10274, 241, 3618, 18500, 10411, 255, 3529,
+ 18436, 10549, 270, 3441, 18369, 10687, 285, 3355, 18300, 10824, 300, 3269, 18230,
+ 10962, 317, 3186, 18157, 11100, 334, 3103, 18082, 11238, 351, 3022, 18006, 11375,
+ 369, 2942, 17927, 11513, 388, 2863, 17847, 11650, 408, 2785, 17765, 11788, 428,
+ 2709, 17681, 11925, 449, 2635, 17595, 12062, 471, 2561, 17507, 12198, 494, 2489,
+ 17418, 12334, 517, 2418, 17327, 12470, 541, 2348, 17234, 12606, 566, 2280, 17140,
+ 12741, 592, 2213, 17044, 12876, 619, 2147, 16946, 13010, 647, 2083, 16846, 13144,
+ 675, 2020, 16745, 13277, 704, 1958, 16643, 13409, 735, 1897, 16539, 13541, 766,
+ 1838, 16434, 13673, 798, 1780, 16327, 13803, 832, 1723, 16218, 13933, 866, 1667,
+ 16109, 14062, 901, 1613, 15998, 14191, 937, 1560, 15885, 14318, 975, 1508, 15772,
+ 14445, 1013, 1457, 15657, 14571, 1052, 1407, 15540, 14695, 1093, 1359, 15423, 14819,
+ 1134, 1312, 15304, 14942, 1177, 1266, 15185, 15064, 1221, 1221, 15064, 15185, 1266,
+ 1177, 14942, 15304, 1312, 1134, 14819, 15423, 1359, 1093, 14695, 15540, 1407, 1052,
+ 14571, 15657, 1457, 1013, 14445, 15772, 1508, 975, 14318, 15885, 1560, 937, 14191,
+ 15998, 1613, 901, 14062, 16109, 1667, 866, 13933, 16218, 1723, 832, 13803, 16327,
+ 1780, 798, 13673, 16434, 1838, 766, 13541, 16539, 1897, 735, 13409, 16643, 1958,
+ 704, 13277, 16745, 2020, 675, 13144, 16846, 2083, 647, 13010, 16946, 2147, 619,
+ 12876, 17044, 2213, 592, 12741, 17140, 2280, 566, 12606, 17234, 2348, 541, 12470,
+ 17327, 2418, 517, 12334, 17418, 2489, 494, 12198, 17507, 2561, 471, 12062, 17595,
+ 2635, 449, 11925, 17681, 2709, 428, 11788, 17765, 2785, 408, 11650, 17847, 2863,
+ 388, 11513, 17927, 2942, 369, 11375, 18006, 3022, 351, 11238, 18082, 3103, 334,
+ 11100, 18157, 3186, 317, 10962, 18230, 3269, 300, 10824, 18300, 3355, 285, 10687,
+ 18369, 3441, 270, 10549, 18436, 3529, 255, 10411, 18500, 3618, 241, 10274, 18563,
+ 3709, 228, 10137, 18624, 3800, 215, 10000, 18682, 3893, 202, 9863, 18738, 3987,
+ 190, 9726, 18793, 4083, 179, 9590, 18845, 4179, 168, 9454, 18895, 4277, 157,
+ 9318, 18942, 4377, 147, 9183, 18988, 4477, 137, 9048, 19031, 4579, 127, 8914,
+ 19073, 4681, 118, 8780, 19112, 4785, 109, 8646, 19148, 4890, 101, 8513, 19183,
+ 4997, 92, 8381, 19215, 5104, 84, 8249, 19245, 5213, 77, 8118, 19273, 5323,
+ 69, 7987, 19298, 5434, 62, 7857, 19321, 5546, 55, 7728, 19342, 5659, 48,
+ 7600, 19361, 5773, 41, 7472, 19377, 5888, 34, 7345, 19391, 6004, 28, 7219,
+ 19403, 6121, 22, 7093, 19412, 6239, 15, 6968, 19419, 6359, 9, 6845, 19424,
+ 6479, 3, 6722, 19426, 6600};
+
+constexpr std::array<s16, 512> curve_lut1 = {
+ -68, 32639, 69, -5, -200, 32630, 212, -15, -328, 32613, 359, -26, -450,
+ 32586, 512, -36, -568, 32551, 669, -47, -680, 32507, 832, -58, -788, 32454,
+ 1000, -69, -891, 32393, 1174, -80, -990, 32323, 1352, -92, -1084, 32244, 1536,
+ -103, -1173, 32157, 1724, -115, -1258, 32061, 1919, -128, -1338, 31956, 2118, -140,
+ -1414, 31844, 2322, -153, -1486, 31723, 2532, -167, -1554, 31593, 2747, -180, -1617,
+ 31456, 2967, -194, -1676, 31310, 3192, -209, -1732, 31157, 3422, -224, -1783, 30995,
+ 3657, -240, -1830, 30826, 3897, -256, -1874, 30649, 4143, -272, -1914, 30464, 4393,
+ -289, -1951, 30272, 4648, -307, -1984, 30072, 4908, -325, -2014, 29866, 5172, -343,
+ -2040, 29652, 5442, -362, -2063, 29431, 5716, -382, -2083, 29203, 5994, -403, -2100,
+ 28968, 6277, -424, -2114, 28727, 6565, -445, -2125, 28480, 6857, -468, -2133, 28226,
+ 7153, -490, -2139, 27966, 7453, -514, -2142, 27700, 7758, -538, -2142, 27428, 8066,
+ -563, -2141, 27151, 8378, -588, -2136, 26867, 8694, -614, -2130, 26579, 9013, -641,
+ -2121, 26285, 9336, -668, -2111, 25987, 9663, -696, -2098, 25683, 9993, -724, -2084,
+ 25375, 10326, -753, -2067, 25063, 10662, -783, -2049, 24746, 11000, -813, -2030, 24425,
+ 11342, -844, -2009, 24100, 11686, -875, -1986, 23771, 12033, -907, -1962, 23438, 12382,
+ -939, -1937, 23103, 12733, -972, -1911, 22764, 13086, -1005, -1883, 22422, 13441, -1039,
+ -1855, 22077, 13798, -1072, -1825, 21729, 14156, -1107, -1795, 21380, 14516, -1141, -1764,
+ 21027, 14877, -1176, -1732, 20673, 15239, -1211, -1700, 20317, 15602, -1246, -1667, 19959,
+ 15965, -1282, -1633, 19600, 16329, -1317, -1599, 19239, 16694, -1353, -1564, 18878, 17058,
+ -1388, -1530, 18515, 17423, -1424, -1495, 18151, 17787, -1459, -1459, 17787, 18151, -1495,
+ -1424, 17423, 18515, -1530, -1388, 17058, 18878, -1564, -1353, 16694, 19239, -1599, -1317,
+ 16329, 19600, -1633, -1282, 15965, 19959, -1667, -1246, 15602, 20317, -1700, -1211, 15239,
+ 20673, -1732, -1176, 14877, 21027, -1764, -1141, 14516, 21380, -1795, -1107, 14156, 21729,
+ -1825, -1072, 13798, 22077, -1855, -1039, 13441, 22422, -1883, -1005, 13086, 22764, -1911,
+ -972, 12733, 23103, -1937, -939, 12382, 23438, -1962, -907, 12033, 23771, -1986, -875,
+ 11686, 24100, -2009, -844, 11342, 24425, -2030, -813, 11000, 24746, -2049, -783, 10662,
+ 25063, -2067, -753, 10326, 25375, -2084, -724, 9993, 25683, -2098, -696, 9663, 25987,
+ -2111, -668, 9336, 26285, -2121, -641, 9013, 26579, -2130, -614, 8694, 26867, -2136,
+ -588, 8378, 27151, -2141, -563, 8066, 27428, -2142, -538, 7758, 27700, -2142, -514,
+ 7453, 27966, -2139, -490, 7153, 28226, -2133, -468, 6857, 28480, -2125, -445, 6565,
+ 28727, -2114, -424, 6277, 28968, -2100, -403, 5994, 29203, -2083, -382, 5716, 29431,
+ -2063, -362, 5442, 29652, -2040, -343, 5172, 29866, -2014, -325, 4908, 30072, -1984,
+ -307, 4648, 30272, -1951, -289, 4393, 30464, -1914, -272, 4143, 30649, -1874, -256,
+ 3897, 30826, -1830, -240, 3657, 30995, -1783, -224, 3422, 31157, -1732, -209, 3192,
+ 31310, -1676, -194, 2967, 31456, -1617, -180, 2747, 31593, -1554, -167, 2532, 31723,
+ -1486, -153, 2322, 31844, -1414, -140, 2118, 31956, -1338, -128, 1919, 32061, -1258,
+ -115, 1724, 32157, -1173, -103, 1536, 32244, -1084, -92, 1352, 32323, -990, -80,
+ 1174, 32393, -891, -69, 1000, 32454, -788, -58, 832, 32507, -680, -47, 669,
+ 32551, -568, -36, 512, 32586, -450, -26, 359, 32613, -328, -15, 212, 32630,
+ -200, -5, 69, 32639, -68};
+
+constexpr std::array<s16, 512> curve_lut2 = {
+ 3195, 26287, 3329, -32, 3064, 26281, 3467, -34, 2936, 26270, 3608, -38, 2811,
+ 26253, 3751, -42, 2688, 26230, 3897, -46, 2568, 26202, 4046, -50, 2451, 26169,
+ 4199, -54, 2338, 26130, 4354, -58, 2227, 26085, 4512, -63, 2120, 26035, 4673,
+ -67, 2015, 25980, 4837, -72, 1912, 25919, 5004, -76, 1813, 25852, 5174, -81,
+ 1716, 25780, 5347, -87, 1622, 25704, 5522, -92, 1531, 25621, 5701, -98, 1442,
+ 25533, 5882, -103, 1357, 25440, 6066, -109, 1274, 25342, 6253, -115, 1193, 25239,
+ 6442, -121, 1115, 25131, 6635, -127, 1040, 25018, 6830, -133, 967, 24899, 7027,
+ -140, 897, 24776, 7227, -146, 829, 24648, 7430, -153, 764, 24516, 7635, -159,
+ 701, 24379, 7842, -166, 641, 24237, 8052, -174, 583, 24091, 8264, -181, 526,
+ 23940, 8478, -187, 472, 23785, 8695, -194, 420, 23626, 8914, -202, 371, 23462,
+ 9135, -209, 324, 23295, 9358, -215, 279, 23123, 9583, -222, 236, 22948, 9809,
+ -230, 194, 22769, 10038, -237, 154, 22586, 10269, -243, 117, 22399, 10501, -250,
+ 81, 22208, 10735, -258, 47, 22015, 10970, -265, 15, 21818, 11206, -271, -16,
+ 21618, 11444, -277, -44, 21415, 11684, -283, -71, 21208, 11924, -290, -97, 20999,
+ 12166, -296, -121, 20786, 12409, -302, -143, 20571, 12653, -306, -163, 20354, 12898,
+ -311, -183, 20134, 13143, -316, -201, 19911, 13389, -321, -218, 19686, 13635, -325,
+ -234, 19459, 13882, -328, -248, 19230, 14130, -332, -261, 18998, 14377, -335, -273,
+ 18765, 14625, -337, -284, 18531, 14873, -339, -294, 18295, 15121, -341, -302, 18057,
+ 15369, -341, -310, 17817, 15617, -341, -317, 17577, 15864, -340, -323, 17335, 16111,
+ -340, -328, 17092, 16357, -338, -332, 16848, 16603, -336, -336, 16603, 16848, -332,
+ -338, 16357, 17092, -328, -340, 16111, 17335, -323, -340, 15864, 17577, -317, -341,
+ 15617, 17817, -310, -341, 15369, 18057, -302, -341, 15121, 18295, -294, -339, 14873,
+ 18531, -284, -337, 14625, 18765, -273, -335, 14377, 18998, -261, -332, 14130, 19230,
+ -248, -328, 13882, 19459, -234, -325, 13635, 19686, -218, -321, 13389, 19911, -201,
+ -316, 13143, 20134, -183, -311, 12898, 20354, -163, -306, 12653, 20571, -143, -302,
+ 12409, 20786, -121, -296, 12166, 20999, -97, -290, 11924, 21208, -71, -283, 11684,
+ 21415, -44, -277, 11444, 21618, -16, -271, 11206, 21818, 15, -265, 10970, 22015,
+ 47, -258, 10735, 22208, 81, -250, 10501, 22399, 117, -243, 10269, 22586, 154,
+ -237, 10038, 22769, 194, -230, 9809, 22948, 236, -222, 9583, 23123, 279, -215,
+ 9358, 23295, 324, -209, 9135, 23462, 371, -202, 8914, 23626, 420, -194, 8695,
+ 23785, 472, -187, 8478, 23940, 526, -181, 8264, 24091, 583, -174, 8052, 24237,
+ 641, -166, 7842, 24379, 701, -159, 7635, 24516, 764, -153, 7430, 24648, 829,
+ -146, 7227, 24776, 897, -140, 7027, 24899, 967, -133, 6830, 25018, 1040, -127,
+ 6635, 25131, 1115, -121, 6442, 25239, 1193, -115, 6253, 25342, 1274, -109, 6066,
+ 25440, 1357, -103, 5882, 25533, 1442, -98, 5701, 25621, 1531, -92, 5522, 25704,
+ 1622, -87, 5347, 25780, 1716, -81, 5174, 25852, 1813, -76, 5004, 25919, 1912,
+ -72, 4837, 25980, 2015, -67, 4673, 26035, 2120, -63, 4512, 26085, 2227, -58,
+ 4354, 26130, 2338, -54, 4199, 26169, 2451, -50, 4046, 26202, 2568, -46, 3897,
+ 26230, 2688, -42, 3751, 26253, 2811, -38, 3608, 26270, 2936, -34, 3467, 26281,
+ 3064, -32, 3329, 26287, 3195};
std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input, double ratio) {
if (input.size() < 2)
@@ -30,40 +149,39 @@ std::vector<s16> Interpolate(InterpolationState& state, std::vector<s16> input,
ratio = 1.0;
}
- if (ratio != state.current_ratio) {
- const double cutoff_frequency = std::min(0.5 / ratio, 0.5 * ratio);
- state.nyquist = CascadingFilter::LowPass(std::clamp(cutoff_frequency, 0.0, 0.4), 3);
- state.current_ratio = ratio;
- }
- state.nyquist.Process(input);
-
- constexpr std::size_t taps = InterpolationState::lanczos_taps;
- const std::size_t num_frames = input.size() / 2;
-
- std::vector<s16> output;
- output.reserve(static_cast<std::size_t>(input.size() / ratio + 4));
-
- double& pos = state.position;
- auto& h = state.history;
- for (std::size_t i = 0; i < num_frames; ++i) {
- std::rotate(h.begin(), h.end() - 1, h.end());
- h[0][0] = input[i * 2 + 0];
- h[0][1] = input[i * 2 + 1];
-
- while (pos <= 1.0) {
- double l = 0.0;
- double r = 0.0;
- for (std::size_t j = 0; j < h.size(); j++) {
- const double lanczos_calc = Lanczos(taps, pos + j - taps + 1);
- l += lanczos_calc * h[j][0];
- r += lanczos_calc * h[j][1];
- }
- output.emplace_back(static_cast<s16>(std::clamp(l, -32768.0, 32767.0)));
- output.emplace_back(static_cast<s16>(std::clamp(r, -32768.0, 32767.0)));
-
- pos += ratio;
+ const int step = static_cast<int>(ratio * 0x8000);
+ const std::array<s16, 512>& lut = [step] {
+ if (step > 0xaaaa) {
+ return curve_lut0;
+ }
+ if (step <= 0x8000) {
+ return curve_lut1;
}
- pos -= 1.0;
+ return curve_lut2;
+ }();
+
+ std::vector<s16> output(static_cast<std::size_t>(input.size() / ratio));
+ int in_offset = 0;
+ for (std::size_t out_offset = 0; out_offset < output.size(); out_offset += 2) {
+ const int lut_index = (state.fraction >> 8) * 4;
+
+ const int l = input[(in_offset + 0) * 2 + 0] * lut[lut_index + 0] +
+ input[(in_offset + 1) * 2 + 0] * lut[lut_index + 1] +
+ input[(in_offset + 2) * 2 + 0] * lut[lut_index + 2] +
+ input[(in_offset + 3) * 2 + 0] * lut[lut_index + 3];
+
+ const int r = input[(in_offset + 0) * 2 + 1] * lut[lut_index + 0] +
+ input[(in_offset + 1) * 2 + 1] * lut[lut_index + 1] +
+ input[(in_offset + 2) * 2 + 1] * lut[lut_index + 2] +
+ input[(in_offset + 3) * 2 + 1] * lut[lut_index + 3];
+
+ const int new_offset = state.fraction + step;
+
+ in_offset += new_offset >> 15;
+ state.fraction = new_offset & 0x7fff;
+
+ output[out_offset + 0] = static_cast<s16>(std::clamp(l >> 15, SHRT_MIN, SHRT_MAX));
+ output[out_offset + 1] = static_cast<s16>(std::clamp(r >> 15, SHRT_MIN, SHRT_MAX));
}
return output;
diff --git a/src/audio_core/algorithm/interpolate.h b/src/audio_core/algorithm/interpolate.h
index edbd6460f..1b9831a75 100644
--- a/src/audio_core/algorithm/interpolate.h
+++ b/src/audio_core/algorithm/interpolate.h
@@ -6,19 +6,12 @@
#include <array>
#include <vector>
-#include "audio_core/algorithm/filter.h"
#include "common/common_types.h"
namespace AudioCore {
struct InterpolationState {
- static constexpr std::size_t lanczos_taps = 4;
- static constexpr std::size_t history_size = lanczos_taps * 2 - 1;
-
- double current_ratio = 0.0;
- CascadingFilter nyquist;
- std::array<std::array<s16, 2>, history_size> history = {};
- double position = 0;
+ int fraction = 0;
};
/// Interpolates input signal to produce output signal.
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 26612e692..88c06b2ce 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -187,6 +187,8 @@ add_library(core STATIC
hle/kernel/synchronization.h
hle/kernel/thread.cpp
hle/kernel/thread.h
+ hle/kernel/time_manager.cpp
+ hle/kernel/time_manager.h
hle/kernel/transfer_memory.cpp
hle/kernel/transfer_memory.h
hle/kernel/vm_manager.cpp
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 0eb0c0dca..86e314c94 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -707,4 +707,12 @@ const Service::SM::ServiceManager& System::ServiceManager() const {
return *impl->service_manager;
}
+void System::RegisterCoreThread(std::size_t id) {
+ impl->kernel.RegisterCoreThread(id);
+}
+
+void System::RegisterHostThread() {
+ impl->kernel.RegisterHostThread();
+}
+
} // namespace Core
diff --git a/src/core/core.h b/src/core/core.h
index e69d68fcf..8d862a8e6 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -360,6 +360,12 @@ public:
const CurrentBuildProcessID& GetCurrentProcessBuildID() const;
+ /// Register a host thread as an emulated CPU Core.
+ void RegisterCoreThread(std::size_t id);
+
+ /// Register a host thread as an auxiliary thread.
+ void RegisterHostThread();
+
private:
System();
diff --git a/src/core/hardware_properties.h b/src/core/hardware_properties.h
index 213461b6a..b04e046ed 100644
--- a/src/core/hardware_properties.h
+++ b/src/core/hardware_properties.h
@@ -20,6 +20,8 @@ constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
} // namespace Hardware
+constexpr u32 INVALID_HOST_THREAD_ID = 0xFFFFFFFF;
+
struct EmuThreadHandle {
u32 host_handle;
u32 guest_handle;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4eb1d8703..9232f4d7e 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -3,9 +3,12 @@
// Refer to the license.txt file included.
#include <atomic>
+#include <bitset>
#include <functional>
#include <memory>
#include <mutex>
+#include <thread>
+#include <unordered_map>
#include <utility>
#include "common/assert.h"
@@ -15,6 +18,7 @@
#include "core/core.h"
#include "core/core_timing.h"
#include "core/core_timing_util.h"
+#include "core/hardware_properties.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/errors.h"
#include "core/hle/kernel/handle_table.h"
@@ -25,6 +29,7 @@
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/synchronization.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
#include "core/hle/lock.h"
#include "core/hle/result.h"
#include "core/memory.h"
@@ -44,7 +49,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
std::lock_guard lock{HLE::g_hle_lock};
std::shared_ptr<Thread> thread =
- system.Kernel().RetrieveThreadFromWakeupCallbackHandleTable(proper_handle);
+ system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
if (thread == nullptr) {
LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
return;
@@ -97,8 +102,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
}
struct KernelCore::Impl {
- explicit Impl(Core::System& system)
- : system{system}, global_scheduler{system}, synchronization{system} {}
+ explicit Impl(Core::System& system, KernelCore& kernel)
+ : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {}
void Initialize(KernelCore& kernel) {
Shutdown();
@@ -120,7 +125,7 @@ struct KernelCore::Impl {
system_resource_limit = nullptr;
- thread_wakeup_callback_handle_table.Clear();
+ global_handle_table.Clear();
thread_wakeup_event_type = nullptr;
preemption_event = nullptr;
@@ -138,8 +143,8 @@ struct KernelCore::Impl {
void InitializePhysicalCores() {
exclusive_monitor =
- Core::MakeExclusiveMonitor(system.Memory(), global_scheduler.CpuCoresCount());
- for (std::size_t i = 0; i < global_scheduler.CpuCoresCount(); i++) {
+ Core::MakeExclusiveMonitor(system.Memory(), Core::Hardware::NUM_CPU_CORES);
+ for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
cores.emplace_back(system, i, *exclusive_monitor);
}
}
@@ -184,6 +189,50 @@ struct KernelCore::Impl {
system.Memory().SetCurrentPageTable(*process);
}
+ void RegisterCoreThread(std::size_t core_id) {
+ std::unique_lock lock{register_thread_mutex};
+ const std::thread::id this_id = std::this_thread::get_id();
+ const auto it = host_thread_ids.find(this_id);
+ ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
+ ASSERT(it == host_thread_ids.end());
+ ASSERT(!registered_core_threads[core_id]);
+ host_thread_ids[this_id] = static_cast<u32>(core_id);
+ registered_core_threads.set(core_id);
+ }
+
+ void RegisterHostThread() {
+ std::unique_lock lock{register_thread_mutex};
+ const std::thread::id this_id = std::this_thread::get_id();
+ const auto it = host_thread_ids.find(this_id);
+ ASSERT(it == host_thread_ids.end());
+ host_thread_ids[this_id] = registered_thread_ids++;
+ }
+
+ u32 GetCurrentHostThreadID() const {
+ const std::thread::id this_id = std::this_thread::get_id();
+ const auto it = host_thread_ids.find(this_id);
+ if (it == host_thread_ids.end()) {
+ return Core::INVALID_HOST_THREAD_ID;
+ }
+ return it->second;
+ }
+
+ Core::EmuThreadHandle GetCurrentEmuThreadID() const {
+ Core::EmuThreadHandle result = Core::EmuThreadHandle::InvalidHandle();
+ result.host_handle = GetCurrentHostThreadID();
+ if (result.host_handle >= Core::Hardware::NUM_CPU_CORES) {
+ return result;
+ }
+ const Kernel::Scheduler& sched = cores[result.host_handle].Scheduler();
+ const Kernel::Thread* current = sched.GetCurrentThread();
+ if (current != nullptr) {
+ result.guest_handle = current->GetGlobalHandle();
+ } else {
+ result.guest_handle = InvalidHandle;
+ }
+ return result;
+ }
+
std::atomic<u32> next_object_id{0};
std::atomic<u64> next_kernel_process_id{Process::InitialKIPIDMin};
std::atomic<u64> next_user_process_id{Process::ProcessIDMin};
@@ -194,15 +243,16 @@ struct KernelCore::Impl {
Process* current_process = nullptr;
Kernel::GlobalScheduler global_scheduler;
Kernel::Synchronization synchronization;
+ Kernel::TimeManager time_manager;
std::shared_ptr<ResourceLimit> system_resource_limit;
std::shared_ptr<Core::Timing::EventType> thread_wakeup_event_type;
std::shared_ptr<Core::Timing::EventType> preemption_event;
- // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future,
- // allowing us to simply use a pool index or similar.
- Kernel::HandleTable thread_wakeup_callback_handle_table;
+ // This is the kernel's handle table or supervisor handle table which
+ // stores all the objects in place.
+ Kernel::HandleTable global_handle_table;
/// Map of named ports managed by the kernel, which can be retrieved using
/// the ConnectToPort SVC.
@@ -211,11 +261,17 @@ struct KernelCore::Impl {
std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
std::vector<Kernel::PhysicalCore> cores;
+ // 0-3 IDs represent core threads, >3 represent others
+ std::unordered_map<std::thread::id, u32> host_thread_ids;
+ u32 registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
+ std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads;
+ std::mutex register_thread_mutex;
+
// System context
Core::System& system;
};
-KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {}
+KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system, *this)} {}
KernelCore::~KernelCore() {
Shutdown();
}
@@ -232,9 +288,8 @@ std::shared_ptr<ResourceLimit> KernelCore::GetSystemResourceLimit() const {
return impl->system_resource_limit;
}
-std::shared_ptr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(
- Handle handle) const {
- return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle);
+std::shared_ptr<Thread> KernelCore::RetrieveThreadFromGlobalHandleTable(Handle handle) const {
+ return impl->global_handle_table.Get<Thread>(handle);
}
void KernelCore::AppendNewProcess(std::shared_ptr<Process> process) {
@@ -265,6 +320,14 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const {
return impl->global_scheduler;
}
+Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) {
+ return impl->cores[id].Scheduler();
+}
+
+const Kernel::Scheduler& KernelCore::Scheduler(std::size_t id) const {
+ return impl->cores[id].Scheduler();
+}
+
Kernel::PhysicalCore& KernelCore::PhysicalCore(std::size_t id) {
return impl->cores[id];
}
@@ -281,6 +344,14 @@ const Kernel::Synchronization& KernelCore::Synchronization() const {
return impl->synchronization;
}
+Kernel::TimeManager& KernelCore::TimeManager() {
+ return impl->time_manager;
+}
+
+const Kernel::TimeManager& KernelCore::TimeManager() const {
+ return impl->time_manager;
+}
+
Core::ExclusiveMonitor& KernelCore::GetExclusiveMonitor() {
return *impl->exclusive_monitor;
}
@@ -338,12 +409,28 @@ const std::shared_ptr<Core::Timing::EventType>& KernelCore::ThreadWakeupCallback
return impl->thread_wakeup_event_type;
}
-Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() {
- return impl->thread_wakeup_callback_handle_table;
+Kernel::HandleTable& KernelCore::GlobalHandleTable() {
+ return impl->global_handle_table;
+}
+
+const Kernel::HandleTable& KernelCore::GlobalHandleTable() const {
+ return impl->global_handle_table;
+}
+
+void KernelCore::RegisterCoreThread(std::size_t core_id) {
+ impl->RegisterCoreThread(core_id);
+}
+
+void KernelCore::RegisterHostThread() {
+ impl->RegisterHostThread();
+}
+
+u32 KernelCore::GetCurrentHostThreadID() const {
+ return impl->GetCurrentHostThreadID();
}
-const Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() const {
- return impl->thread_wakeup_callback_handle_table;
+Core::EmuThreadHandle KernelCore::GetCurrentEmuThreadID() const {
+ return impl->GetCurrentEmuThreadID();
}
} // namespace Kernel
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 1eede3063..c4f78ab71 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -11,6 +11,7 @@
#include "core/hle/kernel/object.h"
namespace Core {
+struct EmuThreadHandle;
class ExclusiveMonitor;
class System;
} // namespace Core
@@ -29,8 +30,10 @@ class HandleTable;
class PhysicalCore;
class Process;
class ResourceLimit;
+class Scheduler;
class Synchronization;
class Thread;
+class TimeManager;
/// Represents a single instance of the kernel.
class KernelCore {
@@ -64,7 +67,7 @@ public:
std::shared_ptr<ResourceLimit> GetSystemResourceLimit() const;
/// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table.
- std::shared_ptr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const;
+ std::shared_ptr<Thread> RetrieveThreadFromGlobalHandleTable(Handle handle) const;
/// Adds the given shared pointer to an internal list of active processes.
void AppendNewProcess(std::shared_ptr<Process> process);
@@ -87,6 +90,12 @@ public:
/// Gets the sole instance of the global scheduler
const Kernel::GlobalScheduler& GlobalScheduler() const;
+ /// Gets the sole instance of the Scheduler assoviated with cpu core 'id'
+ Kernel::Scheduler& Scheduler(std::size_t id);
+
+ /// Gets the sole instance of the Scheduler assoviated with cpu core 'id'
+ const Kernel::Scheduler& Scheduler(std::size_t id) const;
+
/// Gets the an instance of the respective physical CPU core.
Kernel::PhysicalCore& PhysicalCore(std::size_t id);
@@ -99,6 +108,12 @@ public:
/// Gets the an instance of the Synchronization Interface.
const Kernel::Synchronization& Synchronization() const;
+ /// Gets the an instance of the TimeManager Interface.
+ Kernel::TimeManager& TimeManager();
+
+ /// Gets the an instance of the TimeManager Interface.
+ const Kernel::TimeManager& TimeManager() const;
+
/// Stops execution of 'id' core, in order to reschedule a new thread.
void PrepareReschedule(std::size_t id);
@@ -120,6 +135,18 @@ public:
/// Determines whether or not the given port is a valid named port.
bool IsValidNamedPort(NamedPortTable::const_iterator port) const;
+ /// Gets the current host_thread/guest_thread handle.
+ Core::EmuThreadHandle GetCurrentEmuThreadID() const;
+
+ /// Gets the current host_thread handle.
+ u32 GetCurrentHostThreadID() const;
+
+ /// Register the current thread as a CPU Core Thread.
+ void RegisterCoreThread(std::size_t core_id);
+
+ /// Register the current thread as a non CPU core thread.
+ void RegisterHostThread();
+
private:
friend class Object;
friend class Process;
@@ -140,11 +167,11 @@ private:
/// Retrieves the event type used for thread wakeup callbacks.
const std::shared_ptr<Core::Timing::EventType>& ThreadWakeupCallbackEventType() const;
- /// Provides a reference to the thread wakeup callback handle table.
- Kernel::HandleTable& ThreadWakeupCallbackHandleTable();
+ /// Provides a reference to the global handle table.
+ Kernel::HandleTable& GlobalHandleTable();
- /// Provides a const reference to the thread wakeup callback handle table.
- const Kernel::HandleTable& ThreadWakeupCallbackHandleTable() const;
+ /// Provides a const reference to the global handle table.
+ const Kernel::HandleTable& GlobalHandleTable() const;
struct Impl;
std::unique_ptr<Impl> impl;
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 86f1421bf..c65f82fb7 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -18,10 +18,11 @@
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/time_manager.h"
namespace Kernel {
-GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} {}
+GlobalScheduler::GlobalScheduler(KernelCore& kernel) : kernel{kernel} {}
GlobalScheduler::~GlobalScheduler() = default;
@@ -35,7 +36,7 @@ void GlobalScheduler::RemoveThread(std::shared_ptr<Thread> thread) {
}
void GlobalScheduler::UnloadThread(std::size_t core) {
- Scheduler& sched = system.Scheduler(core);
+ Scheduler& sched = kernel.Scheduler(core);
sched.UnloadThread();
}
@@ -50,7 +51,7 @@ void GlobalScheduler::SelectThread(std::size_t core) {
sched.is_context_switch_pending = sched.selected_thread != sched.current_thread;
std::atomic_thread_fence(std::memory_order_seq_cst);
};
- Scheduler& sched = system.Scheduler(core);
+ Scheduler& sched = kernel.Scheduler(core);
Thread* current_thread = nullptr;
// Step 1: Get top thread in schedule queue.
current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front();
@@ -356,6 +357,32 @@ void GlobalScheduler::Shutdown() {
thread_list.clear();
}
+void GlobalScheduler::Lock() {
+ Core::EmuThreadHandle current_thread = kernel.GetCurrentEmuThreadID();
+ if (current_thread == current_owner) {
+ ++scope_lock;
+ } else {
+ inner_lock.lock();
+ current_owner = current_thread;
+ ASSERT(current_owner != Core::EmuThreadHandle::InvalidHandle());
+ scope_lock = 1;
+ }
+}
+
+void GlobalScheduler::Unlock() {
+ if (--scope_lock != 0) {
+ ASSERT(scope_lock > 0);
+ return;
+ }
+ for (std::size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
+ SelectThread(i);
+ }
+ current_owner = Core::EmuThreadHandle::InvalidHandle();
+ scope_lock = 1;
+ inner_lock.unlock();
+ // TODO(Blinkhawk): Setup the interrupts and change context on current core.
+}
+
Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, std::size_t core_id)
: system(system), cpu_core(cpu_core), core_id(core_id) {}
@@ -485,4 +512,27 @@ void Scheduler::Shutdown() {
selected_thread = nullptr;
}
+SchedulerLock::SchedulerLock(KernelCore& kernel) : kernel{kernel} {
+ kernel.GlobalScheduler().Lock();
+}
+
+SchedulerLock::~SchedulerLock() {
+ kernel.GlobalScheduler().Unlock();
+}
+
+SchedulerLockAndSleep::SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle,
+ Thread* time_task, s64 nanoseconds)
+ : SchedulerLock{kernel}, event_handle{event_handle}, time_task{time_task}, nanoseconds{
+ nanoseconds} {
+ event_handle = InvalidHandle;
+}
+
+SchedulerLockAndSleep::~SchedulerLockAndSleep() {
+ if (sleep_cancelled) {
+ return;
+ }
+ auto& time_manager = kernel.TimeManager();
+ time_manager.ScheduleTimeEvent(event_handle, time_task, nanoseconds);
+}
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 96db049cb..1c93a838c 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -6,6 +6,7 @@
#include <atomic>
#include <memory>
+#include <mutex>
#include <vector>
#include "common/common_types.h"
@@ -20,11 +21,13 @@ class System;
namespace Kernel {
+class KernelCore;
class Process;
+class SchedulerLock;
class GlobalScheduler final {
public:
- explicit GlobalScheduler(Core::System& system);
+ explicit GlobalScheduler(KernelCore& kernel);
~GlobalScheduler();
/// Adds a new thread to the scheduler
@@ -138,6 +141,14 @@ public:
void Shutdown();
private:
+ friend class SchedulerLock;
+
+ /// Lock the scheduler to the current thread.
+ void Lock();
+
+ /// Unlocks the scheduler, reselects threads, interrupts cores for rescheduling
+ /// and reschedules current core if needed.
+ void Unlock();
/**
* Transfers a thread into an specific core. If the destination_core is -1
* it will be unscheduled from its source code and added into its suggested
@@ -158,9 +169,14 @@ private:
// ordered from Core 0 to Core 3.
std::array<u32, Core::Hardware::NUM_CPU_CORES> preemption_priorities = {59, 59, 59, 62};
+ /// Scheduler lock mechanisms.
+ std::mutex inner_lock{}; // TODO(Blinkhawk): Replace for a SpinLock
+ std::atomic<s64> scope_lock{};
+ Core::EmuThreadHandle current_owner{Core::EmuThreadHandle::InvalidHandle()};
+
/// Lists all thread ids that aren't deleted/etc.
std::vector<std::shared_ptr<Thread>> thread_list;
- Core::System& system;
+ KernelCore& kernel;
};
class Scheduler final {
@@ -227,4 +243,30 @@ private:
bool is_context_switch_pending = false;
};
+class SchedulerLock {
+public:
+ explicit SchedulerLock(KernelCore& kernel);
+ ~SchedulerLock();
+
+protected:
+ KernelCore& kernel;
+};
+
+class SchedulerLockAndSleep : public SchedulerLock {
+public:
+ explicit SchedulerLockAndSleep(KernelCore& kernel, Handle& event_handle, Thread* time_task,
+ s64 nanoseconds);
+ ~SchedulerLockAndSleep();
+
+ void CancelSleep() {
+ sleep_cancelled = true;
+ }
+
+private:
+ Handle& event_handle;
+ Thread* time_task;
+ s64 nanoseconds;
+ bool sleep_cancelled{};
+};
+
} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index ae5f2c8bd..bf850e0b2 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -46,9 +46,9 @@ Thread::~Thread() = default;
void Thread::Stop() {
// Cancel any outstanding wakeup events for this thread
Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
- callback_handle);
- kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle);
- callback_handle = 0;
+ global_handle);
+ kernel.GlobalHandleTable().Close(global_handle);
+ global_handle = 0;
SetStatus(ThreadStatus::Dead);
Signal();
@@ -73,12 +73,12 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
// thread-safe version of ScheduleEvent.
const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
Core::System::GetInstance().CoreTiming().ScheduleEvent(
- cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle);
+ cycles, kernel.ThreadWakeupCallbackEventType(), global_handle);
}
void Thread::CancelWakeupTimer() {
Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(),
- callback_handle);
+ global_handle);
}
void Thread::ResumeFromWait() {
@@ -190,7 +190,7 @@ ResultVal<std::shared_ptr<Thread>> Thread::Create(KernelCore& kernel, std::strin
thread->condvar_wait_address = 0;
thread->wait_handle = 0;
thread->name = std::move(name);
- thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
+ thread->global_handle = kernel.GlobalHandleTable().Create(thread).Unwrap();
thread->owner_process = &owner_process;
auto& scheduler = kernel.GlobalScheduler();
scheduler.AddThread(thread);
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 7a4916318..129e7858a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -453,6 +453,10 @@ public:
is_sync_cancelled = value;
}
+ Handle GetGlobalHandle() const {
+ return global_handle;
+ }
+
private:
void SetSchedulingStatus(ThreadSchedStatus new_status);
void SetCurrentPriority(u32 new_priority);
@@ -514,7 +518,7 @@ private:
VAddr arb_wait_address{0};
/// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
- Handle callback_handle = 0;
+ Handle global_handle = 0;
/// Callback that will be invoked when the thread is resumed from a waiting state. If the thread
/// was waiting via WaitSynchronization then the object will be the last object that became
diff --git a/src/core/hle/kernel/time_manager.cpp b/src/core/hle/kernel/time_manager.cpp
new file mode 100644
index 000000000..21b290468
--- /dev/null
+++ b/src/core/hle/kernel/time_manager.cpp
@@ -0,0 +1,44 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/core_timing_util.h"
+#include "core/hle/kernel/handle_table.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/kernel/time_manager.h"
+
+namespace Kernel {
+
+TimeManager::TimeManager(Core::System& system) : system{system} {
+ time_manager_event_type = Core::Timing::CreateEvent(
+ "Kernel::TimeManagerCallback", [this](u64 thread_handle, [[maybe_unused]] s64 cycles_late) {
+ Handle proper_handle = static_cast<Handle>(thread_handle);
+ std::shared_ptr<Thread> thread =
+ this->system.Kernel().RetrieveThreadFromGlobalHandleTable(proper_handle);
+ thread->ResumeFromWait();
+ });
+}
+
+void TimeManager::ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds) {
+ if (nanoseconds > 0) {
+ ASSERT(timetask);
+ event_handle = timetask->GetGlobalHandle();
+ const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds});
+ system.CoreTiming().ScheduleEvent(cycles, time_manager_event_type, event_handle);
+ } else {
+ event_handle = InvalidHandle;
+ }
+}
+
+void TimeManager::UnscheduleTimeEvent(Handle event_handle) {
+ if (event_handle == InvalidHandle) {
+ return;
+ }
+ system.CoreTiming().UnscheduleEvent(time_manager_event_type, event_handle);
+}
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/time_manager.h b/src/core/hle/kernel/time_manager.h
new file mode 100644
index 000000000..eaec486d1
--- /dev/null
+++ b/src/core/hle/kernel/time_manager.h
@@ -0,0 +1,43 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "core/hle/kernel/object.h"
+
+namespace Core {
+class System;
+} // namespace Core
+
+namespace Core::Timing {
+struct EventType;
+} // namespace Core::Timing
+
+namespace Kernel {
+
+class Thread;
+
+/**
+ * The `TimeManager` takes care of scheduling time events on threads and executes their TimeUp
+ * method when the event is triggered.
+ */
+class TimeManager {
+public:
+ explicit TimeManager(Core::System& system);
+
+ /// Schedule a time event on `timetask` thread that will expire in 'nanoseconds'
+ /// returns a non-invalid handle in `event_handle` if correctly scheduled
+ void ScheduleTimeEvent(Handle& event_handle, Thread* timetask, s64 nanoseconds);
+
+ /// Unschedule an existing time event
+ void UnscheduleTimeEvent(Handle event_handle);
+
+private:
+ Core::System& system;
+ std::shared_ptr<Core::Timing::EventType> time_manager_event_type;
+};
+
+} // namespace Kernel
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 15c09f04c..c1e32b28c 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -287,13 +287,13 @@ void Controller_NPad::RequestPadStateUpdate(u32 npad_id) {
analog_state[static_cast<std::size_t>(JoystickId::Joystick_Left)]->GetAnalogDirectionStatus(
Input::AnalogDirection::DOWN));
- pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
- ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT));
- pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
- ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT));
pad_state.r_stick_right.Assign(
analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
- ->GetAnalogDirectionStatus(Input::AnalogDirection::UP));
+ ->GetAnalogDirectionStatus(Input::AnalogDirection::RIGHT));
+ pad_state.r_stick_left.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
+ ->GetAnalogDirectionStatus(Input::AnalogDirection::LEFT));
+ pad_state.r_stick_up.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
+ ->GetAnalogDirectionStatus(Input::AnalogDirection::UP));
pad_state.r_stick_down.Assign(analog_state[static_cast<std::size_t>(JoystickId::Joystick_Right)]
->GetAnalogDirectionStatus(Input::AnalogDirection::DOWN));
diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp
index e1a260762..6cabdaa3c 100755
--- a/src/input_common/analog_from_button.cpp
+++ b/src/input_common/analog_from_button.cpp
@@ -34,6 +34,20 @@ public:
y * coef * (x == 0 ? 1.0f : SQRT_HALF));
}
+ bool GetAnalogDirectionStatus(Input::AnalogDirection direction) const override {
+ switch (direction) {
+ case Input::AnalogDirection::RIGHT:
+ return right->GetStatus();
+ case Input::AnalogDirection::LEFT:
+ return left->GetStatus();
+ case Input::AnalogDirection::UP:
+ return up->GetStatus();
+ case Input::AnalogDirection::DOWN:
+ return down->GetStatus();
+ }
+ return false;
+ }
+
private:
Button up;
Button down;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 26939be3f..6ea7cc6a5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -542,7 +542,7 @@ public:
BitField<12, 1, InvMemoryLayout> type;
} memory_layout;
union {
- BitField<0, 16, u32> array_mode;
+ BitField<0, 16, u32> layers;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
@@ -800,8 +800,12 @@ public:
u32 zeta_width;
u32 zeta_height;
+ union {
+ BitField<0, 16, u32> zeta_layers;
+ BitField<16, 1, u32> zeta_volume;
+ };
- INSERT_UNION_PADDING_WORDS(0x27);
+ INSERT_UNION_PADDING_WORDS(0x26);
u32 depth_test_enable;
@@ -1507,6 +1511,7 @@ ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
+ASSERT_REG_POSITION(zeta_layers, 0x48c);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 7d7137109..e8f763ce9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -140,71 +140,6 @@ void GPU::FlushCommands() {
renderer.Rasterizer().FlushCommands();
}
-u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
- ASSERT(format != RenderTargetFormat::NONE);
-
- switch (format) {
- case RenderTargetFormat::RGBA32_FLOAT:
- case RenderTargetFormat::RGBA32_UINT:
- return 16;
- case RenderTargetFormat::RGBA16_UINT:
- case RenderTargetFormat::RGBA16_UNORM:
- case RenderTargetFormat::RGBA16_FLOAT:
- case RenderTargetFormat::RGBX16_FLOAT:
- case RenderTargetFormat::RG32_FLOAT:
- case RenderTargetFormat::RG32_UINT:
- return 8;
- case RenderTargetFormat::RGBA8_UNORM:
- case RenderTargetFormat::RGBA8_SNORM:
- case RenderTargetFormat::RGBA8_SRGB:
- case RenderTargetFormat::RGBA8_UINT:
- case RenderTargetFormat::RGB10_A2_UNORM:
- case RenderTargetFormat::BGRA8_UNORM:
- case RenderTargetFormat::BGRA8_SRGB:
- case RenderTargetFormat::RG16_UNORM:
- case RenderTargetFormat::RG16_SNORM:
- case RenderTargetFormat::RG16_UINT:
- case RenderTargetFormat::RG16_SINT:
- case RenderTargetFormat::RG16_FLOAT:
- case RenderTargetFormat::R32_FLOAT:
- case RenderTargetFormat::R11G11B10_FLOAT:
- case RenderTargetFormat::R32_UINT:
- return 4;
- case RenderTargetFormat::R16_UNORM:
- case RenderTargetFormat::R16_SNORM:
- case RenderTargetFormat::R16_UINT:
- case RenderTargetFormat::R16_SINT:
- case RenderTargetFormat::R16_FLOAT:
- case RenderTargetFormat::RG8_UNORM:
- case RenderTargetFormat::RG8_SNORM:
- return 2;
- case RenderTargetFormat::R8_UNORM:
- case RenderTargetFormat::R8_UINT:
- return 1;
- default:
- UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
- return 1;
- }
-}
-
-u32 DepthFormatBytesPerPixel(DepthFormat format) {
- switch (format) {
- case DepthFormat::Z32_S8_X24_FLOAT:
- return 8;
- case DepthFormat::Z32_FLOAT:
- case DepthFormat::S8_Z24_UNORM:
- case DepthFormat::Z24_X8_UNORM:
- case DepthFormat::Z24_S8_UNORM:
- case DepthFormat::Z24_C8_UNORM:
- return 4;
- case DepthFormat::Z16_UNORM:
- return 2;
- default:
- UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format));
- return 1;
- }
-}
-
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 07727210c..ba8c9d665 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -57,6 +57,7 @@ enum class RenderTargetFormat : u32 {
RG16_UINT = 0xDD,
RG16_FLOAT = 0xDE,
R11G11B10_FLOAT = 0xE0,
+ R32_SINT = 0xE3,
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
B5G6R5_UNORM = 0xE8,
@@ -82,12 +83,6 @@ enum class DepthFormat : u32 {
Z32_S8_X24_FLOAT = 0x19,
};
-/// Returns the number of bytes per pixel of each rendertarget format.
-u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
-
-/// Returns the number of bytes per pixel of each depth format.
-u32 DepthFormatBytesPerPixel(DepthFormat format);
-
struct CommandListHeader;
class DebugContext;
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 2f2fe6859..f2c83266e 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -85,6 +85,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::RGBX16F>,
MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::R32I>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
@@ -166,6 +167,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::RGBX16F>,
MortonCopy<false, PixelFormat::R32UI>,
+ MortonCopy<false, PixelFormat::R32I>,
nullptr,
nullptr,
nullptr,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index d4b81cd87..cf934b0d8 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -87,6 +87,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
+ {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
@@ -260,6 +261,13 @@ CachedSurface::~CachedSurface() = default;
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
MICROPROFILE_SCOPE(OpenGL_Texture_Download);
+ if (params.IsBuffer()) {
+ glGetNamedBufferSubData(texture_buffer.handle, 0,
+ static_cast<GLsizeiptr>(params.GetHostSizeInBytes()),
+ staging_buffer.data());
+ return;
+ }
+
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
for (u32 level = 0; level < params.emulated_levels; ++level) {
@@ -398,24 +406,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
CachedSurfaceView::~CachedSurfaceView() = default;
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
- ASSERT(params.num_layers == 1 && params.num_levels == 1);
+ ASSERT(params.num_levels == 1);
- const auto& owner_params = surface.GetSurfaceParams();
+ const GLuint texture = surface.GetTexture();
+ if (params.num_layers > 1) {
+ // Layered framebuffer attachments
+ UNIMPLEMENTED_IF(params.base_layer != 0);
+
+ switch (params.target) {
+ case SurfaceTarget::Texture2DArray:
+ glFramebufferTexture(target, attachment, texture, params.base_level);
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ return;
+ }
- switch (owner_params.target) {
+ const GLenum view_target = surface.GetTarget();
+ switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level,
+ glFramebufferTextureLayer(target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 331808113..ef66dd141 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -159,12 +159,13 @@ struct FormatTuple {
{vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI
{vk::Format::eUndefined, {}}, // RGBX16F
{vk::Format::eR32Uint, Attachable | Storage}, // R32UI
+ {vk::Format::eR32Sint, Attachable | Storage}, // R32I
{vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8
{vk::Format::eUndefined, {}}, // ASTC_2D_8X5
{vk::Format::eUndefined, {}}, // ASTC_2D_5X4
{vk::Format::eUndefined, {}}, // BGRA8_SRGB
{vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB
- {vk::Format::eUndefined, {}}, // DXT23_SRGB
+ {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB
{vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB
{vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB
{vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U
@@ -363,6 +364,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR8G8B8A8Uint;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Uint;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return vk::Format::eR32G32B32A32Uint;
default:
break;
}
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 588a6835f..886bde3b9 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,6 +107,8 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
+ features.shaderStorageImageReadWithoutFormat =
+ is_shader_storage_img_read_without_format_supported;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
@@ -465,6 +467,8 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
+ is_shader_storage_img_read_without_format_supported =
+ supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
@@ -519,6 +523,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR32Uint,
+ vk::Format::eR32Sint,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eB8G8R8A8Unorm,
@@ -538,6 +543,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eBc6HUfloatBlock,
vk::Format::eBc6HSfloatBlock,
vk::Format::eBc1RgbaSrgbBlock,
+ vk::Format::eBc2SrgbBlock,
vk::Format::eBc3SrgbBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eAstc4x4SrgbBlock,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 72603f9f6..2c27ad730 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,6 +122,11 @@ public:
return properties.limits.maxPushConstantsSize;
}
+ /// Returns true if Shader storage Image Read Without Format supported.
+ bool IsShaderStorageImageReadWithoutFormatSupported() const {
+ return is_shader_storage_img_read_without_format_supported;
+ }
+
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -227,6 +232,8 @@ private:
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
+ bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
+ ///< image read without format
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 31c078f6a..3bf86da87 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -611,33 +611,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
vk::RenderPass renderpass) {
FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
- std::numeric_limits<u32>::max()};
+ std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
- const auto MarkAsModifiedAndPush = [&](const View& view) {
- if (view == nullptr) {
+ const auto try_push = [&](const View& view) {
+ if (!view) {
return false;
}
key.views.push_back(view->GetHandle());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
+ key.layers = std::min(key.layers, view->GetNumLayers());
return true;
};
for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
- if (MarkAsModifiedAndPush(color_attachments[index])) {
+ if (try_push(color_attachments[index])) {
texture_cache.MarkColorBufferInUse(index);
}
}
- if (MarkAsModifiedAndPush(zeta_attachment)) {
+ if (try_push(zeta_attachment)) {
texture_cache.MarkDepthBufferInUse();
}
const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
auto& framebuffer = fbentry->second;
if (is_cache_miss) {
- const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass,
- static_cast<u32>(key.views.size()),
- key.views.data(), key.width, key.height, 1);
+ const vk::FramebufferCreateInfo framebuffer_ci(
+ {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width,
+ key.height, key.layers);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 138903d60..4dc8af6e8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -56,6 +56,7 @@ struct FramebufferCacheKey {
vk::RenderPass renderpass{};
u32 width = 0;
u32 height = 0;
+ u32 layers = 0;
ImageViewsPack views;
std::size_t Hash() const noexcept {
@@ -66,12 +67,17 @@ struct FramebufferCacheKey {
}
boost::hash_combine(hash, width);
boost::hash_combine(hash, height);
+ boost::hash_combine(hash, layers);
return hash;
}
bool operator==(const FramebufferCacheKey& rhs) const noexcept {
- return std::tie(renderpass, views, width, height) ==
- std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
+ return std::tie(renderpass, views, width, height, layers) ==
+ std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
+ }
+
+ bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
+ return !operator==(rhs);
}
};
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 0a8ec8398..204b7c39c 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -23,7 +23,14 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4>
} else if (color == std::array<float, 4>{1, 1, 1, 1}) {
return vk::BorderColor::eFloatOpaqueWhite;
} else {
- return {};
+ if (color[0] + color[1] + color[2] > 1.35f) {
+ // If color elements are brighter than roughly 0.5 average, use white border
+ return vk::BorderColor::eFloatOpaqueWhite;
+ }
+ if (color[3] > 0.5f) {
+ return vk::BorderColor::eFloatOpaqueBlack;
+ }
+ return vk::BorderColor::eFloatTransparentBlack;
}
}
@@ -37,8 +44,6 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
const auto border_color{tsc.GetBorderColor()};
const auto vk_border_color{TryConvertBorderColor(border_color)};
- UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
- border_color[0], border_color[1], border_color[2], border_color[3]);
constexpr bool unnormalized_coords{false};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index f64f5da28..2da622d15 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -86,6 +86,7 @@ struct AttributeType {
struct VertexIndices {
std::optional<u32> position;
+ std::optional<u32> layer;
std::optional<u32> viewport;
std::optional<u32> point_size;
std::optional<u32> clip_distances;
@@ -284,14 +285,20 @@ public:
AddExtension("SPV_KHR_variable_pointers");
AddExtension("SPV_KHR_shader_draw_parameters");
- if (ir.UsesViewportIndex()) {
- AddCapability(spv::Capability::MultiViewport);
- if (device.IsExtShaderViewportIndexLayerSupported()) {
+ if (ir.UsesLayer() || ir.UsesViewportIndex()) {
+ if (ir.UsesViewportIndex()) {
+ AddCapability(spv::Capability::MultiViewport);
+ }
+ if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
AddExtension("SPV_EXT_shader_viewport_index_layer");
AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
}
}
+ if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ AddCapability(spv::Capability::StorageImageReadWithoutFormat);
+ }
+
if (device.IsFloat16Supported()) {
AddCapability(spv::Capability::Float16);
}
@@ -924,13 +931,22 @@ private:
VertexIndices indices;
indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
+ if (ir.UsesLayer()) {
+ if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
+ indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
+ } else {
+ LOG_ERROR(
+ Render_Vulkan,
+ "Shader requires Layer but it's not supported on this stage with this device.");
+ }
+ }
+
if (ir.UsesViewportIndex()) {
if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
} else {
- LOG_ERROR(Render_Vulkan,
- "Shader requires ViewportIndex but it's not supported on this "
- "stage with this device.");
+ LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
+ "this stage with this device.");
}
}
@@ -1292,6 +1308,13 @@ private:
}
case Attribute::Index::LayerViewportPointSize:
switch (element) {
+ case 1: {
+ if (!out_indices.layer) {
+ return {};
+ }
+ const u32 index = out_indices.layer.value();
+ return {AccessElement(t_out_int, out_vertex, index), Type::Int};
+ }
case 2: {
if (!out_indices.viewport) {
return {};
@@ -1362,6 +1385,11 @@ private:
UNIMPLEMENTED();
}
+ if (!target.id) {
+ // On failure we return a nullptr target.id, skip these stores.
+ return {};
+ }
+
OpStore(target.id, As(Visit(src), target.type));
return {};
}
@@ -1755,8 +1783,16 @@ private:
}
Expression ImageLoad(Operation operation) {
- UNIMPLEMENTED();
- return {};
+ if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ return {v_float_zero, Type::Float};
+ }
+
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
+
+ const Id coords = GetCoordinates(operation, Type::Int);
+ const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
+
+ return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
}
Expression ImageStore(Operation operation) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index d3edbe80c..22e3d34de 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -151,6 +151,10 @@ public:
return params.GetMipHeight(base_level);
}
+ u32 GetNumLayers() const {
+ return num_layers;
+ }
+
bool IsBufferView() const {
return buffer_view;
}
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 542636430..bee7d8cad 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -527,7 +527,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
const bool is_bindless = bindless_reg.has_value();
UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow);
- ASSERT_MSG(texture_type != TextureType::Texture3D || is_array || is_shadow,
+ ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
"Illegal texture type");
const SamplerInfo info{texture_type, is_array, is_shadow, false};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1655ccf16..9707c353d 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -155,6 +155,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::R16I;
case Tegra::RenderTargetFormat::R32_FLOAT:
return PixelFormat::R32F;
+ case Tegra::RenderTargetFormat::R32_SINT:
+ return PixelFormat::R32I;
case Tegra::RenderTargetFormat::R32_UINT:
return PixelFormat::R32UI;
case Tegra::RenderTargetFormat::RG32_UINT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 0d17a93ed..d88109e5a 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -59,47 +59,48 @@ enum class PixelFormat {
RG32UI = 41,
RGBX16F = 42,
R32UI = 43,
- ASTC_2D_8X8 = 44,
- ASTC_2D_8X5 = 45,
- ASTC_2D_5X4 = 46,
- BGRA8_SRGB = 47,
- DXT1_SRGB = 48,
- DXT23_SRGB = 49,
- DXT45_SRGB = 50,
- BC7U_SRGB = 51,
- R4G4B4A4U = 52,
- ASTC_2D_4X4_SRGB = 53,
- ASTC_2D_8X8_SRGB = 54,
- ASTC_2D_8X5_SRGB = 55,
- ASTC_2D_5X4_SRGB = 56,
- ASTC_2D_5X5 = 57,
- ASTC_2D_5X5_SRGB = 58,
- ASTC_2D_10X8 = 59,
- ASTC_2D_10X8_SRGB = 60,
- ASTC_2D_6X6 = 61,
- ASTC_2D_6X6_SRGB = 62,
- ASTC_2D_10X10 = 63,
- ASTC_2D_10X10_SRGB = 64,
- ASTC_2D_12X12 = 65,
- ASTC_2D_12X12_SRGB = 66,
- ASTC_2D_8X6 = 67,
- ASTC_2D_8X6_SRGB = 68,
- ASTC_2D_6X5 = 69,
- ASTC_2D_6X5_SRGB = 70,
- E5B9G9R9F = 71,
+ R32I = 44,
+ ASTC_2D_8X8 = 45,
+ ASTC_2D_8X5 = 46,
+ ASTC_2D_5X4 = 47,
+ BGRA8_SRGB = 48,
+ DXT1_SRGB = 49,
+ DXT23_SRGB = 50,
+ DXT45_SRGB = 51,
+ BC7U_SRGB = 52,
+ R4G4B4A4U = 53,
+ ASTC_2D_4X4_SRGB = 54,
+ ASTC_2D_8X8_SRGB = 55,
+ ASTC_2D_8X5_SRGB = 56,
+ ASTC_2D_5X4_SRGB = 57,
+ ASTC_2D_5X5 = 58,
+ ASTC_2D_5X5_SRGB = 59,
+ ASTC_2D_10X8 = 60,
+ ASTC_2D_10X8_SRGB = 61,
+ ASTC_2D_6X6 = 62,
+ ASTC_2D_6X6_SRGB = 63,
+ ASTC_2D_10X10 = 64,
+ ASTC_2D_10X10_SRGB = 65,
+ ASTC_2D_12X12 = 66,
+ ASTC_2D_12X12_SRGB = 67,
+ ASTC_2D_8X6 = 68,
+ ASTC_2D_8X6_SRGB = 69,
+ ASTC_2D_6X5 = 70,
+ ASTC_2D_6X5_SRGB = 71,
+ E5B9G9R9F = 72,
MaxColorFormat,
// Depth formats
- Z32F = 72,
- Z16 = 73,
+ Z32F = 73,
+ Z16 = 74,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 74,
- S8Z24 = 75,
- Z32FS8 = 76,
+ Z24S8 = 75,
+ S8Z24 = 76,
+ Z32FS8 = 77,
MaxDepthStencilFormat,
@@ -171,6 +172,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // RG32UI
0, // RGBX16F
0, // R32UI
+ 0, // R32I
2, // ASTC_2D_8X8
2, // ASTC_2D_8X5
2, // ASTC_2D_5X4
@@ -267,6 +269,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // RG32UI
1, // RGBX16F
1, // R32UI
+ 1, // R32I
8, // ASTC_2D_8X8
8, // ASTC_2D_8X5
5, // ASTC_2D_5X4
@@ -355,6 +358,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // RG32UI
1, // RGBX16F
1, // R32UI
+ 1, // R32I
8, // ASTC_2D_8X8
5, // ASTC_2D_8X5
4, // ASTC_2D_5X4
@@ -443,6 +447,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
64, // RG32UI
64, // RGBX16F
32, // R32UI
+ 32, // R32I
128, // ASTC_2D_8X8
128, // ASTC_2D_8X5
128, // ASTC_2D_5X4
@@ -546,6 +551,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // RG32UI
SurfaceCompression::None, // RGBX16F
SurfaceCompression::None, // R32UI
+ SurfaceCompression::None, // R32I
SurfaceCompression::Converted, // ASTC_2D_8X8
SurfaceCompression::Converted, // ASTC_2D_8X5
SurfaceCompression::Converted, // ASTC_2D_5X4
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 81fb9f633..cc3ad8417 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 74> DefinitionTable = {{
+constexpr std::array<Table, 75> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -89,6 +89,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
{TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
{TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
+ {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},
{TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 84469b7ba..002df414f 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -277,6 +277,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
staging_buffer.data() + host_offset, level);
}
+ } else if (params.IsBuffer()) {
+ // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
+ // memory.
+ std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
} else {
ASSERT(params.target == SurfaceTarget::Texture2D);
ASSERT(params.num_levels == 1);
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 38b3a4ba8..f00839313 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
case PixelFormat::R16U:
- case PixelFormat::R16F: {
+ case PixelFormat::R16F:
params.pixel_format = PixelFormat::Z16;
break;
- }
- case PixelFormat::R32F: {
+ case PixelFormat::R32F:
params.pixel_format = PixelFormat::Z32F;
break;
- }
- default: {
+ default:
UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
static_cast<u32>(params.pixel_format));
}
- }
params.type = GetFormatType(params.pixel_format);
}
params.type = GetFormatType(params.pixel_format);
@@ -168,27 +165,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
return params;
}
-SurfaceParams SurfaceParams::CreateForDepthBuffer(
- Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
- u32 block_width, u32 block_height, u32 block_depth,
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
+SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type;
SurfaceParams params;
- params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+ params.is_tiled = regs.zeta.memory_layout.type ==
+ Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.srgb_conversion = false;
- params.block_width = std::min(block_width, 5U);
- params.block_height = std::min(block_height, 5U);
- params.block_depth = std::min(block_depth, 5U);
+ params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U);
+ params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U);
+ params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromDepthFormat(format);
+ params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
params.type = GetFormatType(params.pixel_format);
- params.width = zeta_width;
- params.height = zeta_height;
- params.target = SurfaceTarget::Texture2D;
- params.depth = 1;
+ params.width = regs.zeta_width;
+ params.height = regs.zeta_height;
params.pitch = 0;
params.num_levels = 1;
params.emulated_levels = 1;
- params.is_layered = false;
+
+ const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0;
+ params.is_layered = is_layered;
+ params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
+ params.depth = is_layered ? regs.zeta_layers.Value() : 1U;
return params;
}
@@ -214,11 +213,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.width = params.pitch / bpp;
}
params.height = config.height;
- params.depth = 1;
- params.target = SurfaceTarget::Texture2D;
params.num_levels = 1;
params.emulated_levels = 1;
- params.is_layered = false;
+
+ const bool is_layered = config.layers > 1 && params.block_depth == 0;
+ params.is_layered = is_layered;
+ params.depth = is_layered ? config.layers.Value() : 1;
+ params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
return params;
}
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 9256fd6d9..995cc3818 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -35,10 +35,7 @@ public:
const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(
- Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
- u32 block_width, u32 block_height, u32 block_depth,
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
+ static SurfaceParams CreateForDepthBuffer(Core::System& system);
/// Creates SurfaceCachedParams from a framebuffer configuration.
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f4c015635..c70e4aec2 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -160,10 +160,7 @@ public:
SetEmptyDepthBuffer();
return {};
}
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(
- system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
- regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
- regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
+ const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@@ -721,7 +718,6 @@ private:
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
const SurfaceParams& params, bool preserve_contents,
bool is_render) {
-
// Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done.
@@ -733,14 +729,18 @@ private:
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
+
const auto struct_result = current_surface->MatchesStructure(params);
- if (struct_result != MatchStructureResult::None &&
- (params.target != SurfaceTarget::Texture3D ||
- current_surface->MatchTarget(params.target))) {
- if (struct_result == MatchStructureResult::FullMatch) {
- return ManageStructuralMatch(current_surface, params, is_render);
- } else {
- return RebuildSurface(current_surface, params, is_render);
+ if (struct_result != MatchStructureResult::None) {
+ const auto& old_params = current_surface->GetSurfaceParams();
+ const bool not_3d = params.target != SurfaceTarget::Texture3D &&
+ old_params.target != SurfaceTarget::Texture3D;
+ if (not_3d || current_surface->MatchTarget(params.target)) {
+ if (struct_result == MatchStructureResult::FullMatch) {
+ return ManageStructuralMatch(current_surface, params, is_render);
+ } else {
+ return RebuildSurface(current_surface, params, is_render);
+ }
}
}
}