summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/uint128.h20
-rw-r--r--src/common/wall_clock.cpp15
-rw-r--r--src/core/core.cpp3
-rw-r--r--src/core/hle/kernel/kernel.cpp9
-rw-r--r--src/core/hle/service/acc/acc.cpp18
-rw-r--r--src/core/hle/service/hid/hid.cpp123
-rw-r--r--src/core/hle/service/hid/hid.h10
-rw-r--r--src/core/hle/service/time/time_manager.cpp4
-rw-r--r--src/core/hle/service/time/time_manager.h2
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp31
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h28
-rw-r--r--src/video_core/shader/decode/memory.cpp1
-rw-r--r--src/video_core/shader/decode/texture.cpp11
20 files changed, 265 insertions, 47 deletions
diff --git a/src/common/uint128.h b/src/common/uint128.h
index 83560a9ce..4780b2f9d 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -98,4 +98,24 @@ namespace Common {
#endif
}
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+[[nodiscard]] static inline std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
+ u64 remainder = dividend[0] % divisor;
+ u64 accum = dividend[0] / divisor;
+ if (dividend[1] == 0)
+ return {accum, remainder};
+ // We ignore dividend[1] / divisor as that overflows
+ const u64 first_segment = (dividend[1] % divisor) << 32;
+ accum += (first_segment / divisor) << 32;
+ const u64 second_segment = (first_segment % divisor) << 32;
+ accum += (second_segment / divisor);
+ remainder += second_segment % divisor;
+ if (remainder >= divisor) {
+ accum++;
+ remainder -= divisor;
+ }
+ return {accum, remainder};
+}
+
} // namespace Common
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index 1545993bd..49830b8ab 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -20,9 +20,7 @@ using base_time_point = std::chrono::time_point<base_timer>;
class StandardWallClock final : public WallClock {
public:
explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_)
- : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false),
- emulated_clock_factor{GetFixedPoint64Factor(emulated_clock_frequency, 1000000000)},
- emulated_cpu_factor{GetFixedPoint64Factor(emulated_cpu_frequency, 1000000000)} {
+ : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, false) {
start_time = base_timer::now();
}
@@ -45,11 +43,16 @@ public:
}
u64 GetClockCycles() override {
- return MultiplyHigh(GetTimeNS().count(), emulated_clock_factor);
+ std::chrono::nanoseconds time_now = GetTimeNS();
+ const u128 temporary =
+ Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
+ return Common::Divide128On32(temporary, 1000000000).first;
}
u64 GetCPUCycles() override {
- return MultiplyHigh(GetTimeNS().count(), emulated_cpu_factor);
+ std::chrono::nanoseconds time_now = GetTimeNS();
+ const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
+ return Common::Divide128On32(temporary, 1000000000).first;
}
void Pause([[maybe_unused]] bool is_paused) override {
@@ -58,8 +61,6 @@ public:
private:
base_time_point start_time;
- const u64 emulated_clock_factor;
- const u64 emulated_cpu_factor;
};
#ifdef ARCHITECTURE_x86_64
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 30f5e1128..de6305e2a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -308,6 +308,9 @@ struct System::Impl {
// Close all CPU/threading state
cpu_manager.Shutdown();
+ // Release the Time Manager's resources
+ time_manager.Shutdown();
+
// Shutdown kernel and core timing
core_timing.Shutdown();
kernel.Shutdown();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 453695545..331cf3a60 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -101,8 +101,6 @@ struct KernelCore::Impl {
current_process = nullptr;
- system_resource_limit = nullptr;
-
global_handle_table.Clear();
preemption_event = nullptr;
@@ -111,6 +109,13 @@ struct KernelCore::Impl {
exclusive_monitor.reset();
+ hid_shared_mem = nullptr;
+ font_shared_mem = nullptr;
+ irs_shared_mem = nullptr;
+ time_shared_mem = nullptr;
+
+ system_resource_limit = nullptr;
+
// Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
next_host_thread_id = Core::Hardware::NUM_CPU_CORES;
}
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 3ec0e1eca..615e20a54 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -508,7 +508,7 @@ public:
{1, &IManagerForApplication::GetAccountId, "GetAccountId"},
{2, nullptr, "EnsureIdTokenCacheAsync"},
{3, nullptr, "LoadIdTokenCache"},
- {130, nullptr, "GetNintendoAccountUserResourceCacheForApplication"},
+ {130, &IManagerForApplication::GetNintendoAccountUserResourceCacheForApplication, "GetNintendoAccountUserResourceCacheForApplication"},
{150, nullptr, "CreateAuthorizationRequest"},
{160, &IManagerForApplication::StoreOpenContext, "StoreOpenContext"},
{170, nullptr, "LoadNetworkServiceLicenseKindAsync"},
@@ -534,6 +534,22 @@ private:
rb.PushRaw<u64>(user_id.GetNintendoID());
}
+ void GetNintendoAccountUserResourceCacheForApplication(Kernel::HLERequestContext& ctx) {
+ LOG_WARNING(Service_ACC, "(STUBBED) called");
+
+ std::vector<u8> nas_user_base_for_application(0x68);
+ ctx.WriteBuffer(nas_user_base_for_application, 0);
+
+ if (ctx.CanWriteBuffer(1)) {
+ std::vector<u8> unknown_out_buffer(ctx.GetWriteBufferSize(1));
+ ctx.WriteBuffer(unknown_out_buffer, 1);
+ }
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushRaw<u64>(user_id.GetNintendoID());
+ }
+
void StoreOpenContext(Kernel::HLERequestContext& ctx) {
LOG_WARNING(Service_ACC, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index ffc3dfdc3..ba27bbb05 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -273,8 +273,8 @@ Hid::Hid(Core::System& system_) : ServiceFramework{system_, "hid"} {
{204, &Hid::PermitVibration, "PermitVibration"},
{205, &Hid::IsVibrationPermitted, "IsVibrationPermitted"},
{206, &Hid::SendVibrationValues, "SendVibrationValues"},
- {207, nullptr, "SendVibrationGcErmCommand"},
- {208, nullptr, "GetActualVibrationGcErmCommand"},
+ {207, &Hid::SendVibrationGcErmCommand, "SendVibrationGcErmCommand"},
+ {208, &Hid::GetActualVibrationGcErmCommand, "GetActualVibrationGcErmCommand"},
{209, &Hid::BeginPermitVibrationSession, "BeginPermitVibrationSession"},
{210, &Hid::EndPermitVibrationSession, "EndPermitVibrationSession"},
{211, &Hid::IsVibrationDeviceMounted, "IsVibrationDeviceMounted"},
@@ -1093,7 +1093,22 @@ void Hid::GetVibrationDeviceInfo(Kernel::HLERequestContext& ctx) {
VibrationDeviceInfo vibration_device_info;
- vibration_device_info.type = VibrationDeviceType::LinearResonantActuator;
+ switch (vibration_device_handle.npad_type) {
+ case Controller_NPad::NpadType::ProController:
+ case Controller_NPad::NpadType::Handheld:
+ case Controller_NPad::NpadType::JoyconDual:
+ case Controller_NPad::NpadType::JoyconLeft:
+ case Controller_NPad::NpadType::JoyconRight:
+ default:
+ vibration_device_info.type = VibrationDeviceType::LinearResonantActuator;
+ break;
+ case Controller_NPad::NpadType::GameCube:
+ vibration_device_info.type = VibrationDeviceType::GcErm;
+ break;
+ case Controller_NPad::NpadType::Pokeball:
+ vibration_device_info.type = VibrationDeviceType::Unknown;
+ break;
+ }
switch (vibration_device_handle.device_index) {
case Controller_NPad::DeviceIndex::Left:
@@ -1215,6 +1230,108 @@ void Hid::SendVibrationValues(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
+void Hid::SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ struct Parameters {
+ Controller_NPad::DeviceHandle vibration_device_handle;
+ u64 applet_resource_user_id;
+ VibrationGcErmCommand gc_erm_command;
+ };
+ static_assert(sizeof(Parameters) == 0x18, "Parameters has incorrect size.");
+
+ const auto parameters{rp.PopRaw<Parameters>()};
+
+ /**
+ * Note: This uses yuzu-specific behavior such that the StopHard command produces
+ * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined below,
+ * in order to differentiate between Stop and StopHard commands.
+ * This is done to reuse the controller vibration functions made for regular controllers.
+ */
+ const auto vibration_value = [parameters] {
+ switch (parameters.gc_erm_command) {
+ case VibrationGcErmCommand::Stop:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 0.0f,
+ .freq_low = 160.0f,
+ .amp_high = 0.0f,
+ .freq_high = 320.0f,
+ };
+ case VibrationGcErmCommand::Start:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 1.0f,
+ .freq_low = 160.0f,
+ .amp_high = 1.0f,
+ .freq_high = 320.0f,
+ };
+ case VibrationGcErmCommand::StopHard:
+ return Controller_NPad::VibrationValue{
+ .amp_low = 0.0f,
+ .freq_low = 0.0f,
+ .amp_high = 0.0f,
+ .freq_high = 0.0f,
+ };
+ default:
+ return Controller_NPad::DEFAULT_VIBRATION_VALUE;
+ }
+ }();
+
+ applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .VibrateController(parameters.vibration_device_handle, vibration_value);
+
+ LOG_DEBUG(Service_HID,
+ "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}, "
+ "gc_erm_command={}",
+ parameters.vibration_device_handle.npad_type,
+ parameters.vibration_device_handle.npad_id,
+ parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id,
+ parameters.gc_erm_command);
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void Hid::GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ struct Parameters {
+ Controller_NPad::DeviceHandle vibration_device_handle;
+ INSERT_PADDING_WORDS_NOINIT(1);
+ u64 applet_resource_user_id;
+ };
+
+ const auto parameters{rp.PopRaw<Parameters>()};
+
+ const auto last_vibration = applet_resource->GetController<Controller_NPad>(HidController::NPad)
+ .GetLastVibration(parameters.vibration_device_handle);
+
+ const auto gc_erm_command = [last_vibration] {
+ if (last_vibration.amp_low != 0.0f || last_vibration.amp_high != 0.0f) {
+ return VibrationGcErmCommand::Start;
+ }
+
+ /**
+ * Note: This uses yuzu-specific behavior such that the StopHard command produces
+ * vibrations where freq_low == 0.0f and freq_high == 0.0f, as defined in the HID function
+ * SendVibrationGcErmCommand, in order to differentiate between Stop and StopHard commands.
+ * This is done to reuse the controller vibration functions made for regular controllers.
+ */
+ if (last_vibration.freq_low == 0.0f && last_vibration.freq_high == 0.0f) {
+ return VibrationGcErmCommand::StopHard;
+ }
+
+ return VibrationGcErmCommand::Stop;
+ }();
+
+ LOG_DEBUG(Service_HID,
+ "called, npad_type={}, npad_id={}, device_index={}, applet_resource_user_id={}",
+ parameters.vibration_device_handle.npad_type,
+ parameters.vibration_device_handle.npad_id,
+ parameters.vibration_device_handle.device_index, parameters.applet_resource_user_id);
+
+ IPC::ResponseBuilder rb{ctx, 4};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushEnum(gc_erm_command);
+}
+
void Hid::BeginPermitVibrationSession(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
const auto applet_resource_user_id{rp.Pop<u64>()};
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 06ddcf3e4..36ed228c8 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -136,6 +136,8 @@ private:
void PermitVibration(Kernel::HLERequestContext& ctx);
void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
void SendVibrationValues(Kernel::HLERequestContext& ctx);
+ void SendVibrationGcErmCommand(Kernel::HLERequestContext& ctx);
+ void GetActualVibrationGcErmCommand(Kernel::HLERequestContext& ctx);
void BeginPermitVibrationSession(Kernel::HLERequestContext& ctx);
void EndPermitVibrationSession(Kernel::HLERequestContext& ctx);
void IsVibrationDeviceMounted(Kernel::HLERequestContext& ctx);
@@ -154,7 +156,9 @@ private:
void GetNpadCommunicationMode(Kernel::HLERequestContext& ctx);
enum class VibrationDeviceType : u32 {
+ Unknown = 0,
LinearResonantActuator = 1,
+ GcErm = 2,
};
enum class VibrationDevicePosition : u32 {
@@ -163,6 +167,12 @@ private:
Right = 2,
};
+ enum class VibrationGcErmCommand : u64 {
+ Stop = 0,
+ Start = 1,
+ StopHard = 2,
+ };
+
struct VibrationDeviceInfo {
VibrationDeviceType type{};
VibrationDevicePosition position{};
diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp
index 858623e2b..1f7309f6b 100644
--- a/src/core/hle/service/time/time_manager.cpp
+++ b/src/core/hle/service/time/time_manager.cpp
@@ -279,6 +279,10 @@ const SharedMemory& TimeManager::GetSharedMemory() const {
return impl->shared_memory;
}
+void TimeManager::Shutdown() {
+ impl.reset();
+}
+
void TimeManager::UpdateLocalSystemClockTime(s64 posix_time) {
impl->UpdateLocalSystemClockTime(system, posix_time);
}
diff --git a/src/core/hle/service/time/time_manager.h b/src/core/hle/service/time/time_manager.h
index 993c7c288..4db8cc0e1 100644
--- a/src/core/hle/service/time/time_manager.h
+++ b/src/core/hle/service/time/time_manager.h
@@ -61,6 +61,8 @@ public:
const SharedMemory& GetSharedMemory() const;
+ void Shutdown();
+
void SetupTimeZoneManager(std::string location_name,
Clock::SteadyClockTimePoint time_zone_updated_time_point,
std::size_t total_location_name_count, u128 time_zone_rule_version,
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 48d5c4a5e..1ae5f1d62 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -239,6 +239,7 @@ Device::Device() {
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
has_debugging_tool_attached = IsDebugToolAttached(extensions);
+ has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
@@ -275,6 +276,7 @@ Device::Device(std::nullptr_t) {
has_image_load_formatted = true;
has_texture_shadow_lod = true;
has_variable_aoffi = true;
+ has_depth_buffer_float = true;
}
bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee053776d..f24bd0c7b 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -122,6 +122,10 @@ public:
return use_driver_cache;
}
+ bool HasDepthBufferFloat() const {
+ return has_depth_buffer_float;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
@@ -150,6 +154,7 @@ private:
bool use_assembly_shaders{};
bool use_asynchronous_shaders{};
bool use_driver_cache{};
+ bool has_depth_buffer_float{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 418644108..4610fd160 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -889,7 +889,11 @@ void RasterizerOpenGL::SyncViewport() {
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
- glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ if (device.HasDepthBufferFloat()) {
+ glDepthRangeIndexeddNV(static_cast<GLuint>(i), near_depth, far_depth);
+ } else {
+ glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ }
if (!GLAD_GL_NV_viewport_swizzle) {
continue;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 529570ff0..5cf7cd151 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -335,6 +335,10 @@ void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop
const VideoCore::DiskResourceLoadCallback& callback) {
disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
+
+ LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
+ transferable.has_value() ? transferable->size() : 0);
+
if (!transferable) {
return;
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 848eedd66..668633e7b 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -201,10 +201,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer,
});
}
-void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
- update_descriptor_queue.AddBuffer(buffer, offset, size);
-}
-
void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) {
if (num_indices <= current_num_indices) {
return;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 041e6515c..982e92191 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,6 +8,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -16,7 +17,6 @@ namespace Vulkan {
class Device;
class VKDescriptorPool;
class VKScheduler;
-class VKUpdateDescriptorQueue;
class BufferCacheRuntime;
@@ -86,7 +86,9 @@ public:
}
private:
- void BindBuffer(VkBuffer buffer, u32 offset, u32 size);
+ void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
+ update_descriptor_queue.AddBuffer(buffer, offset, size);
+ }
void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 40e2e0d38..c6846d886 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1845,13 +1845,21 @@ private:
Expression TextureGather(Operation operation) {
const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- UNIMPLEMENTED_IF(!meta.aoffi.empty());
const Id coords = GetCoordinates(operation, Type::Float);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id texture{};
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
+ }
+
if (meta.sampler.is_shadow) {
texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
- AsFloat(Visit(meta.depth_compare)));
+ AsFloat(Visit(meta.depth_compare)), mask, operands);
} else {
u32 component_value = 0;
if (meta.component) {
@@ -1860,7 +1868,7 @@ private:
component_value = component->GetValue();
}
texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
- Constant(t_uint, component_value));
+ Constant(t_uint, component_value), mask, operands);
}
return GetTextureElement(operation, texture, Type::Float);
}
@@ -1928,13 +1936,22 @@ private:
const Id image = GetTextureImage(operation);
const Id coords = GetCoordinates(operation, Type::Int);
+
+ spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
+ std::vector<Id> operands;
Id fetch;
+
if (meta.lod && !meta.sampler.is_buffer) {
- fetch = OpImageFetch(t_float4, image, coords, spv::ImageOperandsMask::Lod,
- AsInt(Visit(meta.lod)));
- } else {
- fetch = OpImageFetch(t_float4, image, coords);
+ mask = mask | spv::ImageOperandsMask::Lod;
+ operands.push_back(AsInt(Visit(meta.lod)));
+ }
+
+ if (!meta.aoffi.empty()) {
+ mask = mask | spv::ImageOperandsMask::Offset;
+ operands.push_back(GetOffsetCoordinates(operation));
}
+
+ fetch = OpImageFetch(t_float4, image, coords, mask, operands);
return GetTextureElement(operation, fetch, Type::Float);
}
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index f99273c6a..dc45fdcb1 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -20,20 +20,20 @@ VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKSchedu
VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
void VKUpdateDescriptorQueue::TickFrame() {
- payload.clear();
+ payload_cursor = payload.data();
}
void VKUpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call migth use.
- static constexpr std::size_t MIN_ENTRIES = 0x400;
+ static constexpr size_t MIN_ENTRIES = 0x400;
- if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
+ if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
scheduler.WaitWorker();
- payload.clear();
+ payload_cursor = payload.data();
}
- upload_start = &*payload.end();
+ upload_start = payload_cursor;
}
void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index e214f7195..d35e77c44 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -4,8 +4,7 @@
#pragma once
-#include <variant>
-#include <boost/container/static_vector.hpp>
+#include <array>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -16,13 +15,15 @@ class Device;
class VKScheduler;
struct DescriptorUpdateEntry {
- DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
+ struct Empty {};
+ DescriptorUpdateEntry() = default;
+ DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
-
DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
union {
+ Empty empty{};
VkDescriptorImageInfo image;
VkDescriptorBufferInfo buffer;
VkBufferView texel_buffer;
@@ -41,39 +42,40 @@ public:
void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
void AddImage(VkImageView image_view) {
- payload.emplace_back(VkDescriptorImageInfo{
+ *(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- });
+ };
}
- void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
- payload.emplace_back(VkDescriptorBufferInfo{
+ void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
+ *(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
- });
+ };
}
void AddTexelBuffer(VkBufferView texel_buffer) {
- payload.emplace_back(texel_buffer);
+ *(payload_cursor++) = texel_buffer;
}
private:
const Device& device;
VKScheduler& scheduler;
+ DescriptorUpdateEntry* payload_cursor = nullptr;
const DescriptorUpdateEntry* upload_start = nullptr;
- boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
+ std::array<DescriptorUpdateEntry, 0x10000> payload;
};
} // namespace Vulkan
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 50f4e7d35..7728f600e 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -330,6 +330,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
case StoreType::Bits32:
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
+ case StoreType::Unsigned16:
case StoreType::Signed16: {
Node address = GetAddress(0);
Node memory = (this->*get_memory)(address);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 833fa2a39..c69681e8d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -806,6 +806,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
const std::size_t type_coord_count = GetCoordCount(texture_type);
const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
+ const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
// If enabled arrays index is always stored in the gpr8 field
const u64 array_register = instr.gpr8.Value();
@@ -820,17 +821,23 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
std::vector<Node> coords;
for (std::size_t i = 0; i < type_coord_count; ++i) {
const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
- coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
+ coords.push_back(
+ GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
}
const Node array = is_array ? GetRegister(array_register) : nullptr;
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
+ std::vector<Node> aoffi;
+ if (aoffi_enabled) {
+ aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
+ }
+
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{*sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
+ MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;