diff options
21 files changed, 755 insertions, 324 deletions
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index c23106299..5a5851f66 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1342,7 +1342,7 @@ static void ExitProcess(Core::System& system) { /// Creates a new thread static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr entry_point, u64 arg, VAddr stack_top, u32 priority, s32 processor_id) { - LOG_TRACE(Kernel_SVC, + LOG_DEBUG(Kernel_SVC, "called entrypoint=0x{:08X}, arg=0x{:08X}, stacktop=0x{:08X}, " "threadpriority=0x{:08X}, processorid=0x{:08X} : created handle=0x{:08X}", entry_point, arg, stack_top, priority, processor_id, *out_handle); @@ -1402,7 +1402,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e /// Starts the thread for the provided handle static ResultCode StartThread(Core::System& system, Handle thread_handle) { - LOG_TRACE(Kernel_SVC, "called thread=0x{:08X}", thread_handle); + LOG_DEBUG(Kernel_SVC, "called thread=0x{:08X}", thread_handle); const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); @@ -1425,7 +1425,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) { /// Called when a thread exits static void ExitThread(Core::System& system) { - LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); + LOG_DEBUG(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); current_thread->Stop(); @@ -1435,7 +1435,7 @@ static void ExitThread(Core::System& system) { /// Sleep the current thread static void SleepThread(Core::System& system, s64 nanoseconds) { - LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); + LOG_DEBUG(Kernel_SVC, "called nanoseconds={}", nanoseconds); enum class SleepType : s64 { YieldWithoutLoadBalancing = 0, @@ -1880,52 +1880,59 @@ static ResultCode GetThreadCoreMask(Core::System& system, Handle thread_handle, } static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, u32 core, - u64 mask) { - LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:016X}, core=0x{:X}", thread_handle, - mask, core); + u64 affinity_mask) { + LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, core=0x{:X}, affinity_mask=0x{:016X}", + thread_handle, core, affinity_mask); - const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); - const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); - if (!thread) { - LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", - thread_handle); - return ERR_INVALID_HANDLE; - } + const auto* const current_process = system.Kernel().CurrentProcess(); if (core == static_cast<u32>(THREADPROCESSORID_IDEAL)) { - const u8 ideal_cpu_core = thread->GetOwnerProcess()->GetIdealCore(); + const u8 ideal_cpu_core = current_process->GetIdealCore(); ASSERT(ideal_cpu_core != static_cast<u8>(THREADPROCESSORID_IDEAL)); // Set the target CPU to the ideal core specified by the process. core = ideal_cpu_core; - mask = 1ULL << core; - } - - if (mask == 0) { - LOG_ERROR(Kernel_SVC, "Mask is 0"); - return ERR_INVALID_COMBINATION; - } + affinity_mask = 1ULL << core; + } else { + const u64 core_mask = current_process->GetCoreMask(); + + if ((core_mask | affinity_mask) != core_mask) { + LOG_ERROR( + Kernel_SVC, + "Invalid processor ID specified (core_mask=0x{:08X}, affinity_mask=0x{:016X})", + core_mask, affinity_mask); + return ERR_INVALID_PROCESSOR_ID; + } - /// This value is used to only change the affinity mask without changing the current ideal core. - static constexpr u32 OnlyChangeMask = static_cast<u32>(-3); + if (affinity_mask == 0) { + LOG_ERROR(Kernel_SVC, "Specfified affinity mask is zero."); + return ERR_INVALID_COMBINATION; + } - if (core == OnlyChangeMask) { - core = thread->GetIdealCore(); - } else if (core >= Core::NUM_CPU_CORES && core != static_cast<u32>(-1)) { - LOG_ERROR(Kernel_SVC, "Invalid core specified, got {}", core); - return ERR_INVALID_PROCESSOR_ID; + if (core < Core::NUM_CPU_CORES) { + if ((affinity_mask & (1ULL << core)) == 0) { + LOG_ERROR(Kernel_SVC, + "Core is not enabled for the current mask, core={}, mask={:016X}", core, + affinity_mask); + return ERR_INVALID_COMBINATION; + } + } else if (core != static_cast<u32>(THREADPROCESSORID_DONT_CARE) && + core != static_cast<u32>(THREADPROCESSORID_DONT_UPDATE)) { + LOG_ERROR(Kernel_SVC, "Invalid processor ID specified (core={}).", core); + return ERR_INVALID_PROCESSOR_ID; + } } - // Error out if the input core isn't enabled in the input mask. - if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) { - LOG_ERROR(Kernel_SVC, "Core is not enabled for the current mask, core={}, mask={:016X}", - core, mask); - return ERR_INVALID_COMBINATION; + const auto& handle_table = current_process->GetHandleTable(); + const SharedPtr<Thread> thread = handle_table.Get<Thread>(thread_handle); + if (!thread) { + LOG_ERROR(Kernel_SVC, "Thread handle does not exist, thread_handle=0x{:08X}", + thread_handle); + return ERR_INVALID_HANDLE; } - thread->ChangeCore(core, mask); - + thread->ChangeCore(core, affinity_mask); return RESULT_SUCCESS; } diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index f07332f02..b4b9cda7c 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -30,12 +30,21 @@ enum ThreadPriority : u32 { }; enum ThreadProcessorId : s32 { - THREADPROCESSORID_IDEAL = -2, ///< Run thread on the ideal core specified by the process. - THREADPROCESSORID_0 = 0, ///< Run thread on core 0 - THREADPROCESSORID_1 = 1, ///< Run thread on core 1 - THREADPROCESSORID_2 = 2, ///< Run thread on core 2 - THREADPROCESSORID_3 = 3, ///< Run thread on core 3 - THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this + /// Indicates that no particular processor core is preferred. + THREADPROCESSORID_DONT_CARE = -1, + + /// Run thread on the ideal core specified by the process. + THREADPROCESSORID_IDEAL = -2, + + /// Indicates that the preferred processor ID shouldn't be updated in + /// a core mask setting operation. + THREADPROCESSORID_DONT_UPDATE = -3, + + THREADPROCESSORID_0 = 0, ///< Run thread on core 0 + THREADPROCESSORID_1 = 1, ///< Run thread on core 1 + THREADPROCESSORID_2 = 2, ///< Run thread on core 2 + THREADPROCESSORID_3 = 3, ///< Run thread on core 3 + THREADPROCESSORID_MAX = 4, ///< Processor ID must be less than this /// Allowed CPU mask THREADPROCESSORID_DEFAULT_MASK = (1 << THREADPROCESSORID_0) | (1 << THREADPROCESSORID_1) | diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index e69f6cf7f..75db0c2dc 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -8,6 +8,7 @@ #include "audio_core/audio_renderer.h" #include "common/alignment.h" +#include "common/bit_util.h" #include "common/common_funcs.h" #include "common/logging/log.h" #include "common/string_util.h" @@ -262,64 +263,304 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) { OpenAudioRendererImpl(ctx); } +static u64 CalculateNumPerformanceEntries(const AudioCore::AudioRendererParameter& params) { + // +1 represents the final mix. + return u64{params.effect_count} + params.submix_count + params.sink_count + params.voice_count + + 1; +} + void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { - IPC::RequestParser rp{ctx}; - auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); LOG_DEBUG(Service_Audio, "called"); - u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); - buffer_sz += params.submix_count * 1024; - buffer_sz += 0x940 * (params.submix_count + 1); - buffer_sz += 0x3F0 * params.voice_count; - buffer_sz += Common::AlignUp(8 * (params.submix_count + 1), 0x10); - buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); - buffer_sz += Common::AlignUp( - (0x3C0 * (params.sink_count + params.submix_count) + 4 * params.sample_count) * - (params.mix_buffer_count + 6), - 0x40); - - if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - const u32 count = params.submix_count + 1; - u64 node_count = Common::AlignUp(count, 0x40); - const u64 node_state_buffer_sz = - 4 * (node_count * node_count) + 0xC * node_count + 2 * (node_count / 8); - u64 edge_matrix_buffer_sz = 0; - node_count = Common::AlignUp(count * count, 0x40); - if (node_count >> 31 != 0) { - edge_matrix_buffer_sz = (node_count | 7) / 8; - } else { - edge_matrix_buffer_sz = node_count / 8; + // Several calculations below align the sizes being calculated + // onto a 64 byte boundary. + static constexpr u64 buffer_alignment_size = 64; + + // Some calculations that calculate portions of the buffer + // that will contain information, on the other hand, align + // the result of some of their calcularions on a 16 byte boundary. + static constexpr u64 info_field_alignment_size = 16; + + // Maximum detail entries that may exist at one time for performance + // frame statistics. + static constexpr u64 max_perf_detail_entries = 100; + + // Size of the data structure representing the bulk of the voice-related state. + static constexpr u64 voice_state_size = 0x100; + + // Size of the upsampler manager data structure + constexpr u64 upsampler_manager_size = 0x48; + + // Calculates the part of the size that relates to mix buffers. + const auto calculate_mix_buffer_sizes = [](const AudioCore::AudioRendererParameter& params) { + // As of 8.0.0 this is the maximum on voice channels. + constexpr u64 max_voice_channels = 6; + + // The service expects the sample_count member of the parameters to either be + // a value of 160 or 240, so the maximum sample count is assumed in order + // to adequately handle all values at runtime. + constexpr u64 default_max_sample_count = 240; + + const u64 total_mix_buffers = params.mix_buffer_count + max_voice_channels; + + u64 size = 0; + size += total_mix_buffers * (sizeof(s32) * params.sample_count); + size += total_mix_buffers * (sizeof(s32) * default_max_sample_count); + size += u64{params.submix_count} + params.sink_count; + size = Common::AlignUp(size, buffer_alignment_size); + size += Common::AlignUp(params.unknown_30, buffer_alignment_size); + size += Common::AlignUp(sizeof(s32) * params.mix_buffer_count, buffer_alignment_size); + return size; + }; + + // Calculates the portion of the size related to the mix data (and the sorting thereof). + const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) { + // The size of the mixing info data structure. + constexpr u64 mix_info_size = 0x940; + + // Consists of total submixes with the final mix included. + const u64 total_mix_count = u64{params.submix_count} + 1; + + // The total number of effects that may be available to the audio renderer at any time. + constexpr u64 max_effects = 256; + + // Calculates the part of the size related to the audio node state. + // This will only be used if the audio revision supports the splitter. + const auto calculate_node_state_size = [](std::size_t num_nodes) { + // Internally within a nodestate, it appears to use a data structure + // similar to a std::bitset<64> twice. + constexpr u64 bit_size = Common::BitSize<u64>(); + constexpr u64 num_bitsets = 2; + + // Node state instances have three states internally for performing + // depth-first searches of nodes. Initialized, Found, and Done Sorting. + constexpr u64 num_states = 3; + + u64 size = 0; + size += (num_nodes * num_nodes) * sizeof(s32); + size += num_states * (num_nodes * sizeof(s32)); + size += num_bitsets * (Common::AlignUp(num_nodes, bit_size) / Common::BitSize<u8>()); + return size; + }; + + // Calculates the part of the size related to the adjacency (aka edge) matrix. + const auto calculate_edge_matrix_size = [](std::size_t num_nodes) { + return (num_nodes * num_nodes) * sizeof(s32); + }; + + u64 size = 0; + size += Common::AlignUp(sizeof(void*) * total_mix_count, info_field_alignment_size); + size += Common::AlignUp(mix_info_size * total_mix_count, info_field_alignment_size); + size += Common::AlignUp(sizeof(s32) * max_effects * params.submix_count, + info_field_alignment_size); + + if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { + size += Common::AlignUp(calculate_node_state_size(total_mix_count) + + calculate_edge_matrix_size(total_mix_count), + info_field_alignment_size); } - buffer_sz += Common::AlignUp(node_state_buffer_sz + edge_matrix_buffer_sz, 0x10); - } - buffer_sz += 0x20 * (params.effect_count + 4 * params.voice_count) + 0x50; - if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { - buffer_sz += 0xE0 * params.num_splitter_send_channels; - buffer_sz += 0x20 * params.splitter_count; - buffer_sz += Common::AlignUp(4 * params.num_splitter_send_channels, 0x10); - } - buffer_sz = Common::AlignUp(buffer_sz, 0x40) + 0x170 * params.sink_count; - u64 output_sz = buffer_sz + 0x280 * params.sink_count + 0x4B0 * params.effect_count + - ((params.voice_count * 256) | 0x40); - - if (params.performance_frame_count >= 1) { - output_sz = Common::AlignUp(((16 * params.sink_count + 16 * params.effect_count + - 16 * params.voice_count + 16) + - 0x658) * - (params.performance_frame_count + 1) + - 0xc0, - 0x40) + - output_sz; - } - output_sz = Common::AlignUp(output_sz + 0x1807e, 0x1000); + return size; + }; - IPC::ResponseBuilder rb{ctx, 4}; + // Calculates the part of the size related to voice channel info. + const auto calculate_voice_info_size = [](const AudioCore::AudioRendererParameter& params) { + constexpr u64 voice_info_size = 0x220; + constexpr u64 voice_resource_size = 0xD0; + + u64 size = 0; + size += Common::AlignUp(sizeof(void*) * params.voice_count, info_field_alignment_size); + size += Common::AlignUp(voice_info_size * params.voice_count, info_field_alignment_size); + size += + Common::AlignUp(voice_resource_size * params.voice_count, info_field_alignment_size); + size += Common::AlignUp(voice_state_size * params.voice_count, info_field_alignment_size); + return size; + }; + + // Calculates the part of the size related to memory pools. + const auto calculate_memory_pools_size = [](const AudioCore::AudioRendererParameter& params) { + const u64 num_memory_pools = sizeof(s32) * (u64{params.effect_count} + params.voice_count); + const u64 memory_pool_info_size = 0x20; + return Common::AlignUp(num_memory_pools * memory_pool_info_size, info_field_alignment_size); + }; + + // Calculates the part of the size related to the splitter context. + const auto calculate_splitter_context_size = + [this](const AudioCore::AudioRendererParameter& params) -> u64 { + if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { + return 0; + } + + constexpr u64 splitter_info_size = 0x20; + constexpr u64 splitter_destination_data_size = 0xE0; + + u64 size = 0; + size += params.num_splitter_send_channels; + size += + Common::AlignUp(splitter_info_size * params.splitter_count, info_field_alignment_size); + size += Common::AlignUp(splitter_destination_data_size * params.num_splitter_send_channels, + info_field_alignment_size); + + return size; + }; + + // Calculates the part of the size related to the upsampler info. + const auto calculate_upsampler_info_size = [](const AudioCore::AudioRendererParameter& params) { + constexpr u64 upsampler_info_size = 0x280; + // Yes, using the buffer size over info alignment size is intentional here. + return Common::AlignUp(upsampler_info_size * (u64{params.submix_count} + params.sink_count), + buffer_alignment_size); + }; + + // Calculates the part of the size related to effect info. + const auto calculate_effect_info_size = [](const AudioCore::AudioRendererParameter& params) { + constexpr u64 effect_info_size = 0x2B0; + return Common::AlignUp(effect_info_size * params.effect_count, info_field_alignment_size); + }; + + // Calculates the part of the size related to audio sink info. + const auto calculate_sink_info_size = [](const AudioCore::AudioRendererParameter& params) { + const u64 sink_info_size = 0x170; + return Common::AlignUp(sink_info_size * params.sink_count, info_field_alignment_size); + }; + + // Calculates the part of the size related to voice state info. + const auto calculate_voice_state_size = [](const AudioCore::AudioRendererParameter& params) { + const u64 voice_state_size = 0x100; + const u64 additional_size = buffer_alignment_size - 1; + return Common::AlignUp(voice_state_size * params.voice_count + additional_size, + info_field_alignment_size); + }; + + // Calculates the part of the size related to performance statistics. + const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) { + // Extra size value appended to the end of the calculation. + constexpr u64 appended = 128; + + // Whether or not we assume the newer version of performance metrics data structures. + const bool is_v2 = + IsFeatureSupported(AudioFeatures::PerformanceMetricsVersion2, params.revision); + + // Data structure sizes + constexpr u64 perf_statistics_size = 0x0C; + const u64 header_size = is_v2 ? 0x30 : 0x18; + const u64 entry_size = is_v2 ? 0x18 : 0x10; + const u64 detail_size = is_v2 ? 0x18 : 0x10; + + const u64 entry_count = CalculateNumPerformanceEntries(params); + const u64 size_per_frame = + header_size + (entry_size * entry_count) + (detail_size * max_perf_detail_entries); + + u64 size = 0; + size += Common::AlignUp(size_per_frame * params.performance_frame_count + 1, + buffer_alignment_size); + size += Common::AlignUp(perf_statistics_size, buffer_alignment_size); + size += appended; + return size; + }; + + // Calculates the part of the size that relates to the audio command buffer. + const auto calculate_command_buffer_size = + [this](const AudioCore::AudioRendererParameter& params) { + constexpr u64 alignment = (buffer_alignment_size - 1) * 2; + + if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { + constexpr u64 command_buffer_size = 0x18000; + + return command_buffer_size + alignment; + } + + // When the variadic command buffer is supported, this means + // the command generator for the audio renderer can issue commands + // that are (as one would expect), variable in size. So what we need to do + // is determine the maximum possible size for a few command data structures + // then multiply them by the amount of present commands indicated by the given + // respective audio parameters. + + constexpr u64 max_biquad_filters = 2; + constexpr u64 max_mix_buffers = 24; + + constexpr u64 biquad_filter_command_size = 0x2C; + + constexpr u64 depop_mix_command_size = 0x24; + constexpr u64 depop_setup_command_size = 0x50; + + constexpr u64 effect_command_max_size = 0x540; + + constexpr u64 mix_command_size = 0x1C; + constexpr u64 mix_ramp_command_size = 0x24; + constexpr u64 mix_ramp_grouped_command_size = 0x13C; + + constexpr u64 perf_command_size = 0x28; + + constexpr u64 sink_command_size = 0x130; + + constexpr u64 submix_command_max_size = + depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; + + constexpr u64 volume_command_size = 0x1C; + constexpr u64 volume_ramp_command_size = 0x20; + + constexpr u64 voice_biquad_filter_command_size = + biquad_filter_command_size * max_biquad_filters; + constexpr u64 voice_data_command_size = 0x9C; + const u64 voice_command_max_size = + (params.splitter_count * depop_setup_command_size) + + (voice_data_command_size + voice_biquad_filter_command_size + + volume_ramp_command_size + mix_ramp_grouped_command_size); + + // Now calculate the individual elements that comprise the size and add them together. + const u64 effect_commands_size = params.effect_count * effect_command_max_size; + + const u64 final_mix_commands_size = + depop_mix_command_size + volume_command_size * max_mix_buffers; + const u64 perf_commands_size = + perf_command_size * + (CalculateNumPerformanceEntries(params) + max_perf_detail_entries); + + const u64 sink_commands_size = params.sink_count * sink_command_size; + + const u64 splitter_commands_size = + params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; + + const u64 submix_commands_size = params.submix_count * submix_command_max_size; + + const u64 voice_commands_size = params.voice_count * voice_command_max_size; + + return effect_commands_size + final_mix_commands_size + perf_commands_size + + sink_commands_size + splitter_commands_size + submix_commands_size + + voice_commands_size + alignment; + }; + + IPC::RequestParser rp{ctx}; + const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); + + u64 size = 0; + size += calculate_mix_buffer_sizes(params); + size += calculate_mix_info_size(params); + size += calculate_voice_info_size(params); + size += upsampler_manager_size; + size += calculate_memory_pools_size(params); + size += calculate_splitter_context_size(params); + + size = Common::AlignUp(size, buffer_alignment_size); + + size += calculate_upsampler_info_size(params); + size += calculate_effect_info_size(params); + size += calculate_sink_info_size(params); + size += calculate_voice_state_size(params); + size += calculate_perf_size(params); + size += calculate_command_buffer_size(params); + + // finally, 4KB page align the size, and we're done. + size = Common::AlignUp(size, 4096); + + IPC::ResponseBuilder rb{ctx, 4}; rb.Push(RESULT_SUCCESS); - rb.Push<u64>(output_sz); + rb.Push<u64>(size); - LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz); + LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", size); } void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { @@ -357,10 +598,15 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { } bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { - u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap + // Byte swap + const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); + switch (feature) { case AudioFeatures::Splitter: - return version_num >= 2u; + return version_num >= 2U; + case AudioFeatures::PerformanceMetricsVersion2: + case AudioFeatures::VariadicCommandBuffer: + return version_num >= 5U; default: return false; } diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index e55d25973..1d3c8df61 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h @@ -28,6 +28,8 @@ private: enum class AudioFeatures : u32 { Splitter, + PerformanceMetricsVersion2, + VariadicCommandBuffer, }; bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 036e66f05..3175579cc 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -40,6 +40,13 @@ bool DmaPusher::Step() { } const CommandList& command_list{dma_pushbuffer.front()}; + ASSERT_OR_EXECUTE(!command_list.empty(), { + // Somehow the command_list is empty, in order to avoid a crash + // We ignore it and assume its size is 0. + dma_pushbuffer.pop(); + dma_pushbuffer_subindex = 0; + return true; + }); const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; GPUVAddr dma_get = command_list_header.addr; GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 48e4fec33..f342c78e6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -59,6 +59,7 @@ public: static constexpr std::size_t NumCBData = 16; static constexpr std::size_t NumVertexArrays = 32; static constexpr std::size_t NumVertexAttributes = 32; + static constexpr std::size_t NumVaryings = 31; static constexpr std::size_t NumTextureSamplers = 32; static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t MaxShaderProgram = 6; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e5b4eadea..7bbc556da 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -98,6 +98,10 @@ union Attribute { BitField<22, 2, u64> element; BitField<24, 6, Index> index; BitField<47, 3, AttributeSize> size; + + bool IsPhysical() const { + return element == 0 && static_cast<u64>(index.Value()) == 0; + } } fmt20; union { @@ -499,6 +503,11 @@ enum class SystemVariable : u64 { CircularQueueEntryAddressHigh = 0x63, }; +enum class PhysicalAttributeDirection : u64 { + Input = 0, + Output = 1, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -587,6 +596,7 @@ union Instruction { } alu; union { + BitField<38, 1, u64> idx; BitField<51, 1, u64> saturate; BitField<52, 2, IpaSampleMode> sample_mode; BitField<54, 2, IpaInterpMode> interp_mode; @@ -812,6 +822,12 @@ union Instruction { } stg; union { + BitField<32, 1, PhysicalAttributeDirection> direction; + BitField<47, 3, AttributeSize> size; + BitField<20, 11, u64> address; + } al2p; + + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; BitField<7, 1, u64> abs_a; @@ -1374,8 +1390,9 @@ public: ST_A, ST_L, ST_S, - LDG, // Load from global memory - STG, // Store in global memory + LDG, // Load from global memory + STG, // Store in global memory + AL2P, // Transforms attribute memory into physical memory TEX, TEX_B, // Texture Load Bindless TXQ, // Texture Query @@ -1646,6 +1663,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cdf86f562..05a168a72 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -81,12 +81,6 @@ struct CommandDataContainer { CommandDataContainer(CommandData&& data, u64 next_fence) : data{std::move(data)}, fence{next_fence} {} - CommandDataContainer& operator=(const CommandDataContainer& t) { - data = std::move(t.data); - fence = t.fence; - return *this; - } - CommandData data; u64 fence{}; }; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index fbea107ca..c766ed692 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -120,7 +120,9 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { // An instruction with the Exit flag will not actually // cause an exit if it's executed inside a delay slot. - if (opcode.is_exit && !is_delay_slot) { + // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further + // testing on the MME code. + if (opcode.is_exit) { // Exit has a delay slot, execute the next instruction Step(offset, true); return false; diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index f820f3ed9..0c4ea1494 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -144,8 +144,9 @@ protected: object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); + const CacheAddr addr = object->GetCacheAddr(); interval_cache.subtract({GetInterval(object), ObjectSet{object}}); - map_cache.erase(object->GetCacheAddr()); + map_cache.erase(addr); } /// Returns a ticks counter used for tracking when cached objects were last modified diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b6d9e0ddb..38497678a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -21,9 +21,18 @@ T GetInteger(GLenum pname) { Device::Device() { uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); + max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); } +Device::Device(std::nullptr_t) { + uniform_buffer_alignment = 0; + max_vertex_attributes = 16; + max_varyings = 15; + has_variable_aoffi = true; +} + bool Device::TestVariableAoffi() { const GLchar* AOFFI_TEST = R"(#version 430 core uniform sampler2D tex; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 78ff5ee58..de8490682 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -5,17 +5,27 @@ #pragma once #include <cstddef> +#include "common/common_types.h" namespace OpenGL { class Device { public: - Device(); + explicit Device(); + explicit Device(std::nullptr_t); std::size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + u32 GetMaxVertexAttributes() const { + return max_vertex_attributes; + } + + u32 GetMaxVaryings() const { + return max_varyings; + } + bool HasVariableAoffi() const { return has_variable_aoffi; } @@ -24,6 +34,8 @@ private: static bool TestVariableAoffi(); std::size_t uniform_buffer_alignment{}; + u32 max_vertex_attributes{}; + u32 max_varyings{}; bool has_variable_aoffi{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 1a62795e1..4bff54a59 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -134,6 +134,19 @@ bool IsPrecise(Node node) { return false; } +constexpr bool IsGenericAttribute(Attribute::Index index) { + return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; +} + +constexpr Attribute::Index ToGenericAttribute(u32 value) { + return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0)); +} + +u32 GetGenericAttributeIndex(Attribute::Index index) { + ASSERT(IsGenericAttribute(index)); + return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); +} + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -152,6 +165,7 @@ public: DeclareConstantBuffers(); DeclareGlobalMemory(); DeclareSamplers(); + DeclarePhysicalAttributeReader(); code.AddLine("void execute_" + suffix + "() {"); ++code.scope; @@ -296,76 +310,95 @@ private: } std::string GetInputFlags(AttributeUse attribute) { - std::string out; - switch (attribute) { - case AttributeUse::Constant: - out += "flat "; - break; - case AttributeUse::ScreenLinear: - out += "noperspective "; - break; case AttributeUse::Perspective: // Default, Smooth - break; + return {}; + case AttributeUse::Constant: + return "flat "; + case AttributeUse::ScreenLinear: + return "noperspective "; default: - LOG_CRITICAL(HW_GPU, "Unused attribute being fetched"); - UNREACHABLE(); + case AttributeUse::Unused: + UNREACHABLE_MSG("Unused attribute being fetched"); + return {}; + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); + return {}; } - return out; } void DeclareInputAttributes() { - const auto& attributes = ir.GetInputAttributes(); - for (const auto element : attributes) { - const Attribute::Index index = element.first; - if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { - // Skip when it's not a generic attribute - continue; - } - - // TODO(bunnei): Use proper number of elements for these - u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - idx += GENERIC_VARYING_START_LOCATION; + if (ir.HasPhysicalAttributes()) { + const u32 num_inputs{GetNumPhysicalInputAttributes()}; + for (u32 i = 0; i < num_inputs; ++i) { + DeclareInputAttribute(ToGenericAttribute(i), true); } + code.AddNewLine(); + return; + } - std::string attr = GetInputAttribute(index); - if (stage == ShaderStage::Geometry) { - attr = "gs_" + attr + "[]"; - } - std::string suffix; - if (stage == ShaderStage::Fragment) { - const auto input_mode = - header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION); - suffix = GetInputFlags(input_mode); + const auto& attributes = ir.GetInputAttributes(); + for (const auto index : attributes) { + if (IsGenericAttribute(index)) { + DeclareInputAttribute(index, false); } - code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " + - attr + ';'); } if (!attributes.empty()) code.AddNewLine(); } + void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { + const u32 generic_index{GetGenericAttributeIndex(index)}; + + std::string name{GetInputAttribute(index)}; + if (stage == ShaderStage::Geometry) { + name = "gs_" + name + "[]"; + } + + std::string suffix; + if (stage == ShaderStage::Fragment) { + const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + if (skip_unused && input_mode == AttributeUse::Unused) { + return; + } + suffix = GetInputFlags(input_mode); + } + + u32 location = generic_index; + if (stage != ShaderStage::Vertex) { + // If inputs are varyings, add an offset + location += GENERIC_VARYING_START_LOCATION; + } + + code.AddLine("layout (location = " + std::to_string(location) + ") " + suffix + "in vec4 " + + name + ';'); + } + void DeclareOutputAttributes() { + if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { + for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { + DeclareOutputAttribute(ToGenericAttribute(i)); + } + code.AddNewLine(); + return; + } + const auto& attributes = ir.GetOutputAttributes(); for (const auto index : attributes) { - if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { - // Skip when it's not a generic attribute - continue; + if (IsGenericAttribute(index)) { + DeclareOutputAttribute(index); } - // TODO(bunnei): Use proper number of elements for these - const auto idx = static_cast<u32>(index) - - static_cast<u32>(Attribute::Index::Attribute_0) + - GENERIC_VARYING_START_LOCATION; - code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + - GetOutputAttribute(index) + ';'); } if (!attributes.empty()) code.AddNewLine(); } + void DeclareOutputAttribute(Attribute::Index index) { + const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + code.AddLine("layout (location = " + std::to_string(location) + ") out vec4 " + + GetOutputAttribute(index) + ';'); + } + void DeclareConstantBuffers() { for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; @@ -429,6 +462,39 @@ private: code.AddNewLine(); } + void DeclarePhysicalAttributeReader() { + if (!ir.HasPhysicalAttributes()) { + return; + } + code.AddLine("float readPhysicalAttribute(uint physical_address) {"); + ++code.scope; + code.AddLine("switch (physical_address) {"); + + // Just declare generic attributes for now. + const auto num_attributes{static_cast<u32>(GetNumPhysicalInputAttributes())}; + for (u32 index = 0; index < num_attributes; ++index) { + const auto attribute{ToGenericAttribute(index)}; + for (u32 element = 0; element < 4; ++element) { + constexpr u32 generic_base{0x80}; + constexpr u32 generic_stride{16}; + constexpr u32 element_stride{4}; + const u32 address{generic_base + index * generic_stride + element * element_stride}; + + const bool declared{stage != ShaderStage::Fragment || + header.ps.GetAttributeUse(index) != AttributeUse::Unused}; + const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; + code.AddLine(fmt::format("case 0x{:x}: return {};", address, value)); + } + } + + code.AddLine("default: return 0;"); + + code.AddLine('}'); + --code.scope; + code.AddLine('}'); + code.AddNewLine(); + } + void VisitBlock(const NodeBlock& bb) { for (const Node node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { @@ -483,70 +549,12 @@ private: return value; } else if (const auto abuf = std::get_if<AbufNode>(node)) { - const auto attribute = abuf->GetIndex(); - const auto element = abuf->GetElement(); - - const auto GeometryPass = [&](const std::string& name) { - if (stage == ShaderStage::Geometry && abuf->GetBuffer()) { - // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games - // set an 0x80000000 index for those and the shader fails to build. Find out why - // this happens and what's its intent. - return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) + - ") % MAX_VERTEX_INPUT]"; - } - return name; - }; - - switch (attribute) { - case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { - return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); - } - case Attribute::Index::PointCoord: - switch (element) { - case 0: - return "gl_PointCoord.x"; - case 1: - return "gl_PointCoord.y"; - case 2: - case 3: - return "0"; - } - UNREACHABLE(); - return "0"; - case Attribute::Index::TessCoordInstanceIDVertexID: - // TODO(Subv): Find out what the values are for the first two elements when inside a - // vertex shader, and what's the value of the fourth element when inside a Tess Eval - // shader. - ASSERT(stage == ShaderStage::Vertex); - switch (element) { - case 2: - // Config pack's first value is instance_id. - return "uintBitsToFloat(config_pack[0])"; - case 3: - return "uintBitsToFloat(gl_VertexID)"; - } - UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); - return "0"; - case Attribute::Index::FrontFacing: - // TODO(Subv): Find out what the values are for the other elements. - ASSERT(stage == ShaderStage::Fragment); - switch (element) { - case 3: - return "itof(gl_FrontFacing ? -1 : 0)"; - } - UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); - return "0"; - default: - if (attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31) { - return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); - } - break; + UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, + "Physical attributes in geometry shaders are not implemented"); + if (abuf->IsPhysicalBuffer()) { + return "readPhysicalAttribute(ftou(" + Visit(abuf->GetPhysicalAddress()) + "))"; } - UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); } else if (const auto cbuf = std::get_if<CbufNode>(node)) { const Node offset = cbuf->GetOffset(); @@ -598,6 +606,69 @@ private: return {}; } + std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { + const auto GeometryPass = [&](std::string name) { + if (stage == ShaderStage::Geometry && buffer) { + // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games + // set an 0x80000000 index for those and the shader fails to build. Find out why + // this happens and what's its intent. + return "gs_" + std::move(name) + "[ftou(" + Visit(buffer) + ") % MAX_VERTEX_INPUT]"; + } + return name; + }; + + switch (attribute) { + case Attribute::Index::Position: + if (stage != ShaderStage::Fragment) { + return GeometryPass("position") + GetSwizzle(element); + } else { + return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); + } + case Attribute::Index::PointCoord: + switch (element) { + case 0: + return "gl_PointCoord.x"; + case 1: + return "gl_PointCoord.y"; + case 2: + case 3: + return "0"; + } + UNREACHABLE(); + return "0"; + case Attribute::Index::TessCoordInstanceIDVertexID: + // TODO(Subv): Find out what the values are for the first two elements when inside a + // vertex shader, and what's the value of the fourth element when inside a Tess Eval + // shader. + ASSERT(stage == ShaderStage::Vertex); + switch (element) { + case 2: + // Config pack's first value is instance_id. + return "uintBitsToFloat(config_pack[0])"; + case 3: + return "uintBitsToFloat(gl_VertexID)"; + } + UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); + return "0"; + case Attribute::Index::FrontFacing: + // TODO(Subv): Find out what the values are for the other elements. + ASSERT(stage == ShaderStage::Fragment); + switch (element) { + case 3: + return "itof(gl_FrontFacing ? -1 : 0)"; + } + UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); + return "0"; + default: + if (IsGenericAttribute(attribute)) { + return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); + } + break; + } + UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); + return "0"; + } + std::string ApplyPrecise(Operation operation, const std::string& value) { if (!IsPrecise(operation)) { return value; @@ -833,6 +904,8 @@ private: target = GetRegister(gpr->GetIndex()); } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); + target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: @@ -844,8 +917,7 @@ private: case Attribute::Index::ClipDistances4567: return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']'; default: - if (attribute >= Attribute::Index::Attribute_0 && - attribute <= Attribute::Index::Attribute_31) { + if (IsGenericAttribute(attribute)) { return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", @@ -1591,15 +1663,11 @@ private: } std::string GetInputAttribute(Attribute::Index attribute) const { - const auto index{static_cast<u32>(attribute) - - static_cast<u32>(Attribute::Index::Attribute_0)}; - return GetDeclarationWithSuffix(index, "input_attr"); + return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); } std::string GetOutputAttribute(Attribute::Index attribute) const { - const auto index{static_cast<u32>(attribute) - - static_cast<u32>(Attribute::Index::Attribute_0)}; - return GetDeclarationWithSuffix(index, "output_attr"); + return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); } std::string GetConstBuffer(u32 index) const { @@ -1640,6 +1708,19 @@ private: return name + '_' + std::to_string(index) + '_' + suffix; } + u32 GetNumPhysicalInputAttributes() const { + return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); + } + + u32 GetNumPhysicalAttributes() const { + return std::min<u32>(device.GetMaxVertexAttributes(), Maxwell::NumVertexAttributes); + } + + u32 GetNumPhysicalVaryings() const { + return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, + Maxwell::NumVaryings); + } + const Device& device; const ShaderIR& ir; const ShaderStage stage; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 254c0d499..fba9c594a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -104,8 +104,9 @@ bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { return true; } -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) - : system{system}, precompiled_cache_virtual_file_offset{0} {} +ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} + +ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> ShaderDiskCacheOpenGL::LoadTransferable() { @@ -243,7 +244,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - const auto entry = LoadDecompiledEntry(); + auto entry = LoadDecompiledEntry(); if (!entry) { return {}; } @@ -287,13 +288,13 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn return {}; } - std::vector<u8> code(code_size); + std::string code(code_size, '\0'); if (!LoadArrayFromPrecompiled(code.data(), code.size())) { return {}; } ShaderDiskCacheDecompiled entry; - entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size); + entry.code = std::move(code); u32 const_buffers_count{}; if (!LoadObjectFromPrecompiled(const_buffers_count)) { @@ -303,12 +304,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < const_buffers_count; ++i) { u32 max_offset{}; u32 index{}; - u8 is_indirect{}; + bool is_indirect{}; if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(is_indirect)) { return {}; } - entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index); + entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); } u32 samplers_count{}; @@ -320,18 +321,17 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u64 offset{}; u64 index{}; u32 type{}; - u8 is_array{}; - u8 is_shadow{}; - u8 is_bindless{}; + bool is_array{}; + bool is_shadow{}; + bool is_bindless{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { return {}; } - entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset), - static_cast<std::size_t>(index), - static_cast<Tegra::Shader::TextureType>(type), - is_array != 0, is_shadow != 0, is_bindless != 0); + entry.entries.samplers.emplace_back( + static_cast<std::size_t>(offset), static_cast<std::size_t>(index), + static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); } u32 global_memory_count{}; @@ -342,21 +342,20 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn for (u32 i = 0; i < global_memory_count; ++i) { u32 cbuf_index{}; u32 cbuf_offset{}; - u8 is_read{}; - u8 is_written{}; + bool is_read{}; + bool is_written{}; if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { return {}; } - entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0, - is_written != 0); + entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, + is_written); } for (auto& clip_distance : entry.entries.clip_distances) { - u8 clip_distance_raw{}; - if (!LoadObjectFromPrecompiled(clip_distance_raw)) + if (!LoadObjectFromPrecompiled(clip_distance)) { return {}; - clip_distance = clip_distance_raw != 0; + } } u64 shader_length{}; @@ -384,7 +383,7 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: for (const auto& cbuf : entries.const_buffers) { if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || - !SaveObjectToPrecompiled(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0))) { + !SaveObjectToPrecompiled(cbuf.IsIndirect())) { return false; } } @@ -396,9 +395,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsArray() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(sampler.IsBindless() ? 1 : 0))) { + !SaveObjectToPrecompiled(sampler.IsArray()) || + !SaveObjectToPrecompiled(sampler.IsShadow()) || + !SaveObjectToPrecompiled(sampler.IsBindless())) { return false; } } @@ -409,14 +408,13 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: for (const auto& gmem : entries.global_memory_entries) { if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || - !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(gmem.IsWritten() ? 1 : 0))) { + !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { return false; } } for (const bool clip_distance : entries.clip_distances) { - if (!SaveObjectToPrecompiled(static_cast<u8>(clip_distance ? 1 : 0))) { + if (!SaveObjectToPrecompiled(clip_distance)) { return false; } } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 0142b2e3b..2da0a4a23 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -70,14 +70,14 @@ namespace std { template <> struct hash<OpenGL::BaseBindings> { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const { + std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; } }; template <> struct hash<OpenGL::ShaderDiskCacheUsage> { - std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const { + std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { return static_cast<std::size_t>(usage.unique_identifier) ^ std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16; } @@ -162,6 +162,7 @@ struct ShaderDiskCacheDump { class ShaderDiskCacheOpenGL { public: explicit ShaderDiskCacheOpenGL(Core::System& system); + ~ShaderDiskCacheOpenGL(); /// Loads transferable cache. If file has a old version or on failure, it deletes the file. std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> @@ -259,20 +260,35 @@ private: return SaveArrayToPrecompiled(&object, 1); } + bool SaveObjectToPrecompiled(bool object) { + const auto value = static_cast<u8>(object); + return SaveArrayToPrecompiled(&value, 1); + } + template <typename T> bool LoadObjectFromPrecompiled(T& object) { return LoadArrayFromPrecompiled(&object, 1); } - // Copre system + bool LoadObjectFromPrecompiled(bool& object) { + u8 value; + const bool read_ok = LoadArrayFromPrecompiled(&value, 1); + if (!read_ok) { + return false; + } + + object = value != 0; + return true; + } + + // Core system Core::System& system; // Stored transferable shaders std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; - // Stores whole precompiled cache which will be read from or saved to the precompiled chache - // file + // Stores whole precompiled cache which will be read from/saved to the precompiled cache file FileSys::VectorVfsFile precompiled_cache_virtual_file; // Stores the current offset of the precompiled cache file for IO purposes - std::size_t precompiled_cache_virtual_file_offset; + std::size_t precompiled_cache_virtual_file_offset = 0; // The cache has been loaded at boot bool tried_to_load{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a11000f6b..b61a6d170 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -194,8 +194,8 @@ public: for (const auto& sampler : ir.GetSamplers()) { entries.samplers.emplace_back(sampler); } - for (const auto& attr : ir.GetInputAttributes()) { - entries.attributes.insert(GetGenericAttributeLocation(attr.first)); + for (const auto& attribute : ir.GetInputAttributes()) { + entries.attributes.insert(GetGenericAttributeLocation(attribute)); } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -321,8 +321,7 @@ private: } void DeclareInputAttributes() { - for (const auto element : ir.GetInputAttributes()) { - const Attribute::Index index = element.first; + for (const auto index : ir.GetInputAttributes()) { if (!IsGenericAttribute(index)) { continue; } diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea1092db1..6a992c543 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -12,6 +12,8 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/shader_ir.h" +#pragma optimize("", off) + namespace VideoCommon::Shader { using Tegra::Shader::Attribute; @@ -47,17 +49,20 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { "Indirect attribute loads are not supported"); UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, "Unaligned attribute loads are not supported"); + UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && + instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, + "Non-32 bits PHYS reads are not implemented"); - Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass, - Tegra::Shader::IpaSampleMode::Default}; + const Node buffer{GetRegister(instr.gpr39)}; u64 next_element = instr.attribute.fmt20.element; auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); const auto LoadNextElement = [&](u32 reg_offset) { - const Node buffer = GetRegister(instr.gpr39); - const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index), - next_element, input_mode, buffer); + const Node attribute{instr.attribute.fmt20.IsPhysical() + ? GetPhysicalInputAttribute(instr.gpr8, buffer) + : GetInputAttribute(static_cast<Attribute::Index>(next_index), + next_element, buffer)}; SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); @@ -239,6 +244,21 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::AL2P: { + // Ignore al2p.direction since we don't care about it. + + // Calculate emulation fake physical address. + const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; + const Node reg{GetRegister(instr.gpr8)}; + const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; + + // Set the fake address to target register. + SetRegister(bb, instr.gpr0, fake_address); + + // Signal the shader IR to declare all possible attributes and varyings + uses_physical_attributes = true; + break; + } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d750a2936..fa17c45b5 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -130,15 +130,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::IPA: { - const auto& attribute = instr.attribute.fmt28; + const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; + + const auto attribute = instr.attribute.fmt28; const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), instr.ipa.sample_mode.Value()}; - const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); - Node value = attr; + Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) + : GetInputAttribute(attribute.index, attribute.element); const Tegra::Shader::Attribute::Index index = attribute.index.Value(); - if (index >= Tegra::Shader::Attribute::Index::Attribute_0 && - index <= Tegra::Shader::Attribute::Index::Attribute_31) { + const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && + index <= Tegra::Shader::Attribute::Index::Attribute_31; + if (is_generic || is_physical) { // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. // In theory by setting them as perspective, OpenGL does the perspective correction. // A way must figured to reverse the last step of it. diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index e4eb0dfd9..153ad1fd0 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -21,6 +21,13 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) + : program_code{program_code}, main_offset{main_offset} { + Decode(); +} + +ShaderIR::~ShaderIR() = default; + Node ShaderIR::StoreNode(NodeData&& node_data) { auto store = std::make_unique<NodeData>(node_data); const Node node = store.get(); @@ -89,13 +96,14 @@ Node ShaderIR::GetPredicate(bool immediate) { return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); } -Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, - const Tegra::Shader::IpaMode& input_mode, Node buffer) { - const auto [entry, is_new] = - used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{})); - entry->second.insert(input_mode); +Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { + used_input_attributes.emplace(index); + return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); +} - return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer)); +Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { + uses_physical_attributes = true; + return StoreNode(AbufNode(GetRegister(physical_address), buffer)); } Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 65f1e1de9..0bf124252 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -328,40 +328,31 @@ struct MetaTexture { u32 element{}; }; -inline constexpr MetaArithmetic PRECISE = {true}; -inline constexpr MetaArithmetic NO_PRECISE = {false}; +constexpr MetaArithmetic PRECISE = {true}; +constexpr MetaArithmetic NO_PRECISE = {false}; using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; /// Holds any kind of operation that can be done in the IR class OperationNode final { public: - template <typename... T> - explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} + explicit OperationNode(OperationCode code) : code{code} {} - template <typename... T> - explicit constexpr OperationNode(OperationCode code, Meta&& meta) - : code{code}, meta{std::move(meta)} {} + explicit OperationNode(OperationCode code, Meta&& meta) : code{code}, meta{std::move(meta)} {} template <typename... T> - explicit constexpr OperationNode(OperationCode code, const T*... operands) + explicit OperationNode(OperationCode code, const T*... operands) : OperationNode(code, {}, operands...) {} template <typename... T> - explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) - : code{code}, meta{std::move(meta)} { - - auto operands_list = {operands_...}; - for (auto& operand : operands_list) { - operands.push_back(operand); - } - } + explicit OperationNode(OperationCode code, Meta&& meta, const T*... operands_) + : code{code}, meta{std::move(meta)}, operands{operands_...} {} explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) : code{code}, meta{meta}, operands{std::move(operands)} {} explicit OperationNode(OperationCode code, std::vector<Node>&& operands) - : code{code}, meta{}, operands{std::move(operands)} {} + : code{code}, operands{std::move(operands)} {} OperationCode GetCode() const { return code; @@ -465,17 +456,14 @@ private: /// Attribute buffer memory (known as attributes or varyings in GLSL terms) class AbufNode final { public: - explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, - const Tegra::Shader::IpaMode& input_mode, Node buffer = {}) - : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {} - + // Initialize for standard attributes (index is explicit). explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, Node buffer = {}) - : input_mode{}, buffer{buffer}, index{index}, element{element} {} + : buffer{buffer}, index{index}, element{element} {} - Tegra::Shader::IpaMode GetInputMode() const { - return input_mode; - } + // Initialize for physical attributes (index is a variable value). + explicit constexpr AbufNode(Node physical_address, Node buffer = {}) + : physical_address{physical_address}, buffer{buffer} {} Tegra::Shader::Attribute::Index GetIndex() const { return index; @@ -489,11 +477,19 @@ public: return buffer; } + bool IsPhysicalBuffer() const { + return physical_address != nullptr; + } + + Node GetPhysicalAddress() const { + return physical_address; + } + private: - const Tegra::Shader::IpaMode input_mode; - const Node buffer; - const Tegra::Shader::Attribute::Index index; - const u32 element; + Node physical_address{}; + Node buffer{}; + Tegra::Shader::Attribute::Index index{}; + u32 element{}; }; /// Constant buffer node, usually mapped to uniform buffers in GLSL @@ -567,11 +563,8 @@ private: class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) - : program_code{program_code}, main_offset{main_offset} { - - Decode(); - } + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); + ~ShaderIR(); const std::map<u32, NodeBlock>& GetBasicBlocks() const { return basic_blocks; @@ -585,8 +578,7 @@ public: return used_predicates; } - const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>& - GetInputAttributes() const { + const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { return used_input_attributes; } @@ -615,6 +607,10 @@ public: return static_cast<std::size_t>(coverage_end * sizeof(u64)); } + bool HasPhysicalAttributes() const { + return uses_physical_attributes; + } + const Tegra::Shader::Header& GetHeader() const { return header; } @@ -696,8 +692,9 @@ private: /// Generates a predicate node for an immediate true or false value Node GetPredicate(bool immediate); /// Generates a node representing an input attribute. Keeps track of used attributes. - Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, - const Tegra::Shader::IpaMode& input_mode, Node buffer = {}); + Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); + /// Generates a node representing a physical input attribute. + Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); /// Generates a node representing an output attribute. Keeps track of used attributes. Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); /// Generates a node representing an internal flag @@ -814,11 +811,12 @@ private: void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc); - Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); + Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); + std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; - std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); + std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, + s64 cursor) const; std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, Node addr_register, @@ -835,12 +833,10 @@ private: return StoreNode(OperationNode(code, std::move(meta), operands...)); } - template <typename... T> Node Operation(OperationCode code, std::vector<Node>&& operands) { return StoreNode(OperationNode(code, std::move(operands))); } - template <typename... T> Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); } @@ -872,13 +868,13 @@ private: std::set<u32> used_registers; std::set<Tegra::Shader::Pred> used_predicates; - std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>> - used_input_attributes; + std::set<Tegra::Shader::Attribute::Index> used_input_attributes; std::set<Tegra::Shader::Attribute::Index> used_output_attributes; std::map<u32, ConstBuffer> used_cbufs; std::set<Sampler> used_samplers; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; + bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes Tegra::Shader::Header header; }; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 4505667ff..19ede1eb9 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -17,22 +17,24 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, for (; cursor >= 0; --cursor) { const Node node = code.at(cursor); if (const auto operation = std::get_if<OperationNode>(node)) { - if (operation->GetCode() == operation_code) + if (operation->GetCode() == operation_code) { return {node, cursor}; + } } if (const auto conditional = std::get_if<ConditionalNode>(node)) { const auto& conditional_code = conditional->GetCode(); const auto [found, internal_cursor] = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); - if (found) + if (found) { return {found, cursor}; + } } } return {}; } } // namespace -Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { +Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(tracked)) { // Cbuf found, but it has to be immediate return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; @@ -65,7 +67,7 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { return nullptr; } -std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { +std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand const auto [found, found_cursor] = @@ -80,7 +82,7 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, } std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, - s64 cursor) { + s64 cursor) const { for (; cursor >= 0; --cursor) { const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); if (!found_node) { |