summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-x.ci/scripts/clang/docker.sh18
-rw-r--r--.ci/scripts/clang/exec.sh8
-rw-r--r--.ci/scripts/clang/upload.sh20
-rw-r--r--.ci/templates/build-standard.yml5
-rw-r--r--externals/glad/include/glad/glad.h3
-rw-r--r--externals/glad/src/glad.c2
-rw-r--r--src/common/uint128.h20
-rw-r--r--src/common/wall_clock.cpp15
-rw-r--r--src/core/core.cpp3
-rw-r--r--src/core/hle/kernel/kernel.cpp9
-rw-r--r--src/core/hle/service/acc/acc.cpp18
-rw-r--r--src/core/hle/service/hid/hid.cpp123
-rw-r--r--src/core/hle/service/hid/hid.h10
-rw-r--r--src/core/hle/service/time/time_manager.cpp4
-rw-r--r--src/core/hle/service/time/time_manager.h2
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp31
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h28
-rw-r--r--src/video_core/shader/decode/memory.cpp1
-rw-r--r--src/video_core/shader/decode/texture.cpp11
26 files changed, 320 insertions, 48 deletions
diff --git a/.ci/scripts/clang/docker.sh b/.ci/scripts/clang/docker.sh
new file mode 100755
index 000000000..885d74e97
--- /dev/null
+++ b/.ci/scripts/clang/docker.sh
@@ -0,0 +1,18 @@
+#!/bin/bash -ex
+
+# Exit on error, rather than continuing with the rest of the script.
+set -e
+
+cd /yuzu
+
+ccache -s
+
+mkdir build || true && cd build
+cmake .. -DDISPLAY_VERSION=$1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/clang -DCMAKE_CXX_COMPILER=/usr/lib/ccache/clang++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON -DENABLE_QT_TRANSLATION=ON -DCMAKE_INSTALL_PREFIX="/usr"
+
+make -j$(nproc)
+
+ccache -s
+
+ctest -VV -C Release
+
diff --git a/.ci/scripts/clang/exec.sh b/.ci/scripts/clang/exec.sh
new file mode 100644
index 000000000..e56cd4325
--- /dev/null
+++ b/.ci/scripts/clang/exec.sh
@@ -0,0 +1,8 @@
+#!/bin/bash -ex
+
+mkdir -p "ccache" || true
+chmod a+x ./.ci/scripts/clang/docker.sh
+# the UID for the container yuzu user is 1027
+sudo chown -R 1027 ./
+docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/clang/docker.sh $1
+sudo chown -R $UID ./
diff --git a/.ci/scripts/clang/upload.sh b/.ci/scripts/clang/upload.sh
new file mode 100644
index 000000000..fe4e6b2ac
--- /dev/null
+++ b/.ci/scripts/clang/upload.sh
@@ -0,0 +1,20 @@
+#!/bin/bash -ex
+
+. .ci/scripts/common/pre-upload.sh
+
+REV_NAME="yuzu-linux-${GITDATE}-${GITREV}"
+ARCHIVE_NAME="${REV_NAME}.tar.xz"
+COMPRESSION_FLAGS="-cJvf"
+
+if [ "${RELEASE_NAME}" = "mainline" ]; then
+ DIR_NAME="${REV_NAME}"
+else
+ DIR_NAME="${REV_NAME}_${RELEASE_NAME}"
+fi
+
+mkdir "$DIR_NAME"
+
+cp build/bin/yuzu-cmd "$DIR_NAME"
+cp build/bin/yuzu "$DIR_NAME"
+
+. .ci/scripts/common/post-upload.sh
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml
index 7422c8346..57d36f813 100644
--- a/.ci/templates/build-standard.yml
+++ b/.ci/templates/build-standard.yml
@@ -12,6 +12,9 @@ jobs:
windows:
BuildSuffix: 'windows-mingw'
ScriptFolder: 'windows'
+ clang:
+ BuildSuffix: 'clang'
+ ScriptFolder: 'clang'
linux:
BuildSuffix: 'linux'
ScriptFolder: 'linux'
@@ -24,4 +27,4 @@ jobs:
parameters:
artifactSource: 'false'
cache: $(parameters.cache)
- version: $(parameters.version) \ No newline at end of file
+ version: $(parameters.version)
diff --git a/externals/glad/include/glad/glad.h b/externals/glad/include/glad/glad.h
index 6e16358ea..191bb9fcb 100644
--- a/externals/glad/include/glad/glad.h
+++ b/externals/glad/include/glad/glad.h
@@ -5156,6 +5156,9 @@ GLAPI PFNGLDEPTHRANGEARRAYVPROC glad_glDepthRangeArrayv;
typedef void (APIENTRYP PFNGLDEPTHRANGEINDEXEDPROC)(GLuint index, GLdouble n, GLdouble f);
GLAPI PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed;
#define glDepthRangeIndexed glad_glDepthRangeIndexed
+typedef void (APIENTRYP PFNGLDEPTHRANGEINDEXEDDNVPROC)(GLuint index, GLdouble n, GLdouble f);
+GLAPI PFNGLDEPTHRANGEINDEXEDDNVPROC glad_glDepthRangeIndexeddNV;
+#define glDepthRangeIndexeddNV glad_glDepthRangeIndexeddNV
typedef void (APIENTRYP PFNGLGETFLOATI_VPROC)(GLenum target, GLuint index, GLfloat *data);
GLAPI PFNGLGETFLOATI_VPROC glad_glGetFloati_v;
#define glGetFloati_v glad_glGetFloati_v
diff --git a/externals/glad/src/glad.c b/externals/glad/src/glad.c
index d3e13163f..7b24cd68d 100644
--- a/externals/glad/src/glad.c
+++ b/externals/glad/src/glad.c
@@ -1044,6 +1044,7 @@ PFNGLDEPTHMASKPROC glad_glDepthMask = NULL;
PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL;
PFNGLDEPTHRANGEARRAYVPROC glad_glDepthRangeArrayv = NULL;
PFNGLDEPTHRANGEINDEXEDPROC glad_glDepthRangeIndexed = NULL;
+PFNGLDEPTHRANGEINDEXEDDNVPROC glad_glDepthRangeIndexeddNV = NULL;
PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL;
PFNGLDETACHSHADERPROC glad_glDetachShader = NULL;
PFNGLDISABLEPROC glad_glDisable = NULL;
@@ -7971,6 +7972,7 @@ static void load_GL_NV_depth_buffer_float(GLADloadproc load) {
glad_glDepthRangedNV = (PFNGLDEPTHRANGEDNVPROC)load("glDepthRangedNV");
glad_glClearDepthdNV = (PFNGLCLEARDEPTHDNVPROC)load("glClearDepthdNV");
glad_glDepthBoundsdNV = (PFNGLDEPTHBOUNDSDNVPROC)load("glDepthBoundsdNV");
+ glad_glDepthRangeIndexeddNV = (PFNGLDEPTHRANGEINDEXEDDNVPROC)load("glDepthRangeIndexeddNV");
}
static void load_GL_NV_draw_texture(GLADloadproc load) {
if(!GLAD_GL_NV_draw_texture) return;
diff --git a/src/common/uint128.h b/src/common/uint128.h
index 83560a9ce..4780b2f9d 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -98,4 +98,24 @@ namespace Common {
#endif
}
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+ u64 remainder = dividend[0] % divisor;
+ u64 accum = dividend[0] / divisor;
+ if (dividend[1] == 0)
+ return {accum, remainder};
+ // We ignore dividend[1] / divisor as that overflows
+ const u64 first_segment = (dividend[1] % divisor) << 32;
+ accum += (first_segment / divisor) << 32;
+ const u64 second_segment = (first_segment % divisor) << 32;
+ accum += (second_segment / divisor);
+ remainder += second_segment % divisor;
+ if (remainder >= divisor) {
+ accum++;
+ remainder -= divisor;
+ }
+ return {accum, remainder};
+}
+
} // namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 1545993bd..49830b8ab 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -20,9 +20,7 @@ using base_time_point = std::chrono::time_point<base_timer>;
class StandardWallClock final : public WallClock {
public:
explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
- : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false),
- emulated_clock_factor{GetFixedPoint64Factor(emulated_clock_frequency, 1000000000)},
- emulated_cpu_factor{GetFixedPoint64Factor(emulated_cpu_frequency, 1000000000)} {
+ : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) {
start_time = base_timer::now();
}
@@ -45,11 +43,16 @@ public:
}
u64 GetClockCycles() override {
- return MultiplyHigh(GetTimeNS().count(), emulated_clock_factor);
+ std::chrono::nanoseconds time_now = GetTimeNS();
+ const u128 temporary =
+ Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
+ return Common::Divide128On32(temporary, 1000000000).first;
}
u64 GetCPUCycles() override {
- return MultiplyHigh(GetTimeNS().count(), emulated_cpu_factor);
+ std::chrono::nanoseconds time_now = GetTimeNS();
+ const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
+ return Common::Divide128On32(temporary, 1000000000).first;
}
void Pause([[maybe_unused]] bool is_paused) override {
@@ -58,8 +61,6 @@ public:
private:
base_time_point start_time;
- const u64 emulated_clock_factor;
- const u64 emulated_cpu_factor;
};
#ifdef ARCHITECTURE_x86_64
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 30f5e1128..de6305e2a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -308,6 +308,9 @@ struct System::Impl {
// Close all CPU/threading state
cpu_manager.Shutdown();
+ // Release the Time Manager's resources
+ time_manager.Shutdown();
+
// Shutdown kernel and core timing
core_timing.Shutdown();
kernel.Shutdown();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 453695545..331cf3a60 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -101,8 +101,6 @@ struct KernelCore::Impl {
current_process = nullptr;
- system_resource_limit = nullptr;
-
global_handle_table.Clear();
preemption_event = nullptr;
@@ -111,6 +109,13 @@ struct KernelCore::Impl {
exclusive_monitor.reset();
+ hid_shared_mem = nullptr;
+ font_shared_mem = nullptr;
+ irs_shared_mem = nullptr;
+ time_shared_mem = nullptr;
+
+ system_resource_limit = nullptr;
+
// Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
}
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 3ec0e1eca..615e20a54 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -508,7 +508,7 @@ public:
{1, &IManagerForApplication::GetAccountId, "GetAccountId"},
{2, nullptr, "EnsureIdTokenCacheAsync"},
{3, nullptr, "LoadIdTokenCache"},
- {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"},
+ {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"},
{150, nullptr, "CreateAuthorizationRequest"},
{160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"},
{170, nullptr, "LoadNetworkServiceLicenseKindAsync"},
@@ -534,6 +534,22 @@ private:
rb.PushRaw<u64>(user_id.GetNintendoID());
}
+ void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+ std::vector<u8> nas_user_base_for_application(0x68);
+ ctx.WriteBuffer(nas_user_base_for_application, 0);
+
+ if (ctx.CanWriteBuffer(1)) {
+ std::vector<u8> unknown_out_buffer(ctx.GetWriteBufferSize(1));
+ ctx.WriteBuffer(unknown_out_buffer, 1);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushRaw<u64>(user_id.GetNintendoID());
+ }
+
void StoreOpenContext(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index ffc3dfdc3..ba27bbb05 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -273,8 +273,8 @@ Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} {
{204, &Hid::PermitVibration, "PermitVibration"},
{205, &Hid::IsVibrationPermitted, "IsVibrationPermitted"},
{206, &Hid::SendVibrationValues, "SendVibrationValues"},
- {207, nullptr, "SendVibrationGcErmCommand"},
- {208, nullptr, "GetActualVibrationGcErmCommand"},
+ {207, &Hid::SendVibrationGcErmCommand, "SendVibrationGcErmCommand"},
+ {208, &Hid::GetActualVibrationGcErmCommand, "GetActualVibrationGcErmCommand"},
{209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"},
{210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"},
{211, &Hid::IsVibrationDeviceMounted, "IsVibrationDeviceMounted"},
@@ -1093,7 +1093,22 @@ void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) {
VibrationDeviceInfo vibration_device_info;
- vibration_device_info.type = VibrationDeviceType::LinearResonantActuator;
+ switch (vibration_device_handle.npad_type) {
+ case Controller_NPad::NpadType::ProController:
+ case Controller_NPad::NpadType::Handheld:
+ case Controller_NPad::NpadType::JoyconDual:
+ case Controller_NPad::NpadType::JoyconLeft:
+ case Controller_NPad::NpadType::JoyconRight:
+ default:
+ vibration_device_info.type = VibrationDeviceType::LinearResonantActuator;
+ break;
+ case Controller_NPad::NpadType::GameCube:
+ vibration_device_info.type = VibrationDeviceType::GcErm;
+ break;
+ case Controller_NPad::NpadType::Pokeball:
+ vibration_device_info.type = VibrationDeviceType::Unknown;
+ break;
+ }
switch (vibration_device_handle.device_index) {
case Controller_NPad::DeviceIndex::Left:
@@ -1215,6 +1230,108 @@ void Hid::SendVibrationValues(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
+void Hid::SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ struct Parameters {
+ Controller_NPad::DeviceHandle vibration_device_handle;
+ u64 applet_resource_user_id;
+ VibrationGcErmCommand gc_erm_command;
+ };
+ static_assert(sizeof(Parameters) == 0x18, "Parameters has incorrect size.");
+
+ const auto parameters{rp.PopRaw<Parameters>()};
+
+ /**
+ * Note: This uses yuzu-specific behavior such that the StopHard command produces
+ * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined below,
+ * in order to differentiate between Stop and StopHard commands.
+ * This is done to reuse the controller vibration functions made for regular controllers.
+ */
+ const auto vibration_value = [parameters] {
+ switch (parameters.gc_erm_command) {
+ case VibrationGcErmCommand::Stop:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 0.0f,
+ .freq_low = 160.0f,
+ .amp_high = 0.0f,
+ .freq_high = 320.0f,
+ };
+ case VibrationGcErmCommand::Start:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 1.0f,
+ .freq_low = 160.0f,
+ .amp_high = 1.0f,
+ .freq_high = 320.0f,
+ };
+ case VibrationGcErmCommand::StopHard:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 0.0f,
+ .freq_low = 0.0f,
+ .amp_high = 0.0f,
+ .freq_high = 0.0f,
+ };
+ default:
+ return Controller_NPad::DEFAULT_VIBRATION_VALUE;
+ }
+ }();
+
+ applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .VibrateController(parameters.vibration_device_handle, vibration_value);
+
+ LOG_DEBUG(Service_HID,
+ "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}, "
+ "gc_erm_command={}",
+ parameters.vibration_device_handle.npad_type,
+ parameters.vibration_device_handle.npad_id,
+ parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id,
+ parameters.gc_erm_command);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ struct Parameters {
+ Controller_NPad::DeviceHandle vibration_device_handle;
+ INSERT_PADDING_WORDS_NOINIT(1);
+ u64 applet_resource_user_id;
+ };
+
+ const auto parameters{rp.PopRaw<Parameters>()};
+
+ const auto last_vibration = applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .GetLastVibration(parameters.vibration_device_handle);
+
+ const auto gc_erm_command = [last_vibration] {
+ if (last_vibration.amp_low != 0.0f || last_vibration.amp_high != 0.0f) {
+ return VibrationGcErmCommand::Start;
+ }
+
+ /**
+ * Note: This uses yuzu-specific behavior such that the StopHard command produces
+ * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined in the HID function
+ * SendVibrationGcErmCommand, in order to differentiate between Stop and StopHard commands.
+ * This is done to reuse the controller vibration functions made for regular controllers.
+ */
+ if (last_vibration.freq_low == 0.0f && last_vibration.freq_high == 0.0f) {
+ return VibrationGcErmCommand::StopHard;
+ }
+
+ return VibrationGcErmCommand::Stop;
+ }();
+
+ LOG_DEBUG(Service_HID,
+ "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}",
+ parameters.vibration_device_handle.npad_type,
+ parameters.vibration_device_handle.npad_id,
+ parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushEnum(gc_erm_command);
+}
+
void Hid::BeginPermitVibrationSession(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 06ddcf3e4..36ed228c8 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -136,6 +136,8 @@ private:
void PermitVibration(Kernel::HLERequestContext& ctx);
void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
void SendVibrationValues(Kernel::HLERequestContext& ctx);
+ void SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx);
+ void GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx);
void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx);
void EndPermitVibrationSession(Kernel::HLERequestContext& ctx);
void IsVibrationDeviceMounted(Kernel::HLERequestContext& ctx);
@@ -154,7 +156,9 @@ private:
void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
enum class VibrationDeviceType : u32 {
+ Unknown = 0,
LinearResonantActuator = 1,
+ GcErm = 2,
};
enum class VibrationDevicePosition : u32 {
@@ -163,6 +167,12 @@ private:
Right = 2,
};
+ enum class VibrationGcErmCommand : u64 {
+ Stop = 0,
+ Start = 1,
+ StopHard = 2,
+ };
+
struct VibrationDeviceInfo {
VibrationDeviceType type{};
VibrationDevicePosition position{};
diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp
index 858623e2b..1f7309f6b 100644
--- a/src/core/hle/service/time/time_manager.cpp
+++ b/src/core/hle/service/time/time_manager.cpp
@@ -279,6 +279,10 @@ const SharedMemory& TimeManager::GetSharedMemory() const {
return impl->shared_memory;
}
+void TimeManager::Shutdown() {
+ impl.reset();
+}
+
void TimeManager::UpdateLocalSystemClockTime(s64 posix_time) {
impl->UpdateLocalSystemClockTime(system, posix_time);
}
diff --git a/src/core/hle/service/time/time_manager.h b/src/core/hle/service/time/time_manager.h
index 993c7c288..4db8cc0e1 100644
--- a/src/core/hle/service/time/time_manager.h
+++ b/src/core/hle/service/time/time_manager.h
@@ -61,6 +61,8 @@ public:
const SharedMemory& GetSharedMemory() const;
+ void Shutdown();
+
void SetupTimeZoneManager(std::string location_name,
Clock::SteadyClockTimePoint time_zone_updated_time_point,
std::size_t total_location_name_count, u128 time_zone_rule_version,
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 48d5c4a5e..1ae5f1d62 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -239,6 +239,7 @@ Device::Device() {
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
+ has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -275,6 +276,7 @@ Device::Device(std::nullptr_t) {
has_image_load_formatted = true;
has_texture_shadow_lod = true;
has_variable_aoffi = true;
+ has_depth_buffer_float = true;
}
bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee053776d..f24bd0c7b 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -122,6 +122,10 @@ public:
return use_driver_cache;
}
+ bool HasDepthBufferFloat() const {
+ return has_depth_buffer_float;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -150,6 +154,7 @@ private:
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
+ bool has_depth_buffer_float{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 418644108..4610fd160 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -889,7 +889,11 @@ void RasterizerOpenGL::SyncViewport() {
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
- glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ if (device.HasDepthBufferFloat()) {
+ glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
+ } else {
+ glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ }
if (!GLAD_GL_NV_viewport_swizzle) {
continue;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 529570ff0..5cf7cd151 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -335,6 +335,10 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
const VideoCore::DiskResourceLoadCallback& callback) {
disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
+
+ LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
+ transferable.has_value() ? transferable->size() : 0);
+
if (!transferable) {
return;
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 848eedd66..668633e7b 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -201,10 +201,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
});
}
-void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
- update_descriptor_queue.AddBuffer(buffer, offset, size);
-}
-
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
if (num_indices <= current_num_indices) {
return;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 041e6515c..982e92191 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,6 +8,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -16,7 +17,6 @@ namespace Vulkan {
class Device;
class VKDescriptorPool;
class VKScheduler;
-class VKUpdateDescriptorQueue;
class BufferCacheRuntime;
@@ -86,7 +86,9 @@ public:
}
private:
- void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
+ void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ update_descriptor_queue.AddBuffer(buffer, offset, size);
+ }
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 40e2e0d38..c6846d886 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1845,13 +1845,21 @@ private:
Expression TextureGather(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
const Id coords = GetCoordinates(operation, Type::Float);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id texture{};
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
+ }
+
if (meta.sampler.is_shadow) {
texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
- AsFloat(Visit(meta.depth_compare)));
+ AsFloat(Visit(meta.depth_compare)), mask, operands);
} else {
u32 component_value = 0;
if (meta.component) {
@@ -1860,7 +1868,7 @@ private:
component_value = component->GetValue();
}
texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
- Constant(t_uint, component_value));
+ Constant(t_uint, component_value), mask, operands);
}
return GetTextureElement(operation, texture, Type::Float);
}
@@ -1928,13 +1936,22 @@ private:
const Id image = GetTextureImage(operation);
const Id coords = GetCoordinates(operation, Type::Int);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id fetch;
+
if (meta.lod && !meta.sampler.is_buffer) {
- fetch = OpImageFetch(t_float4, image, coords, spv::ImageOperandsMask::Lod,
- AsInt(Visit(meta.lod)));
- } else {
- fetch = OpImageFetch(t_float4, image, coords);
+ mask = mask | spv::ImageOperandsMask::Lod;
+ operands.push_back(AsInt(Visit(meta.lod)));
+ }
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
}
+
+ fetch = OpImageFetch(t_float4, image, coords, mask, operands);
return GetTextureElement(operation, fetch, Type::Float);
}
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index f99273c6a..dc45fdcb1 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -20,20 +20,20 @@ VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKSchedu
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
void VKUpdateDescriptorQueue::TickFrame() {
- payload.clear();
+ payload_cursor = payload.data();
}
void VKUpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
- static constexpr std::size_t MIN_ENTRIES = 0x400;
+ static constexpr size_t MIN_ENTRIES = 0x400;
- if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
+ if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
- payload.clear();
+ payload_cursor = payload.data();
}
- upload_start = &*payload.end();
+ upload_start = payload_cursor;
}
void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index e214f7195..d35e77c44 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -4,8 +4,7 @@
#pragma once
-#include <variant>
-#include <boost/container/static_vector.hpp>
+#include <array>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -16,13 +15,15 @@ class Device;
class VKScheduler;
struct DescriptorUpdateEntry {
- DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
+ struct Empty {};
+ DescriptorUpdateEntry() = default;
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
-
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
union {
+ Empty empty{};
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
VkBufferView texel_buffer;
@@ -41,39 +42,40 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
void AddImage(VkImageView image_view) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
- void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
- payload.emplace_back(VkDescriptorBufferInfo{
+ void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
+ *(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
- });
+ };
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- payload.emplace_back(texel_buffer);
+ *(payload_cursor++) = texel_buffer;
}
private:
const Device& device;
VKScheduler& scheduler;
+ DescriptorUpdateEntry* payload_cursor = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
- boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+ std::array<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 50f4e7d35..7728f600e 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -330,6 +330,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
case StoreType::Bits32:
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
+ case StoreType::Unsigned16:
case StoreType::Signed16: {
Node address = GetAddress(0);
Node memory = (this->*get_memory)(address);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 833fa2a39..c69681e8d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -806,6 +806,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+ const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
@@ -820,17 +821,23 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
- coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+ coords.push_back(
+ GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
+ std::vector<Node> aoffi;
+ if (aoffi_enabled) {
+ aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
+ }
+
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
+ MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;