From 0cfb0bacb2581d79631f496afbc3a3d5dd19eb42 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Thu, 6 Sep 2018 15:48:08 +0200 Subject: video_core: Move command buffer loop. This moves the hot loop into video_core. This refactoring shall reduce the CPU overhead of calling ProcessCommandList. --- src/video_core/command_processor.cpp | 97 +++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 45 deletions(-) (limited to 'src/video_core/command_processor.cpp') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d5831e752..e0c277105 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -69,57 +69,64 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) } } -void GPU::ProcessCommandList(GPUVAddr address, u32 size) { - const boost::optional head_address = memory_manager->GpuToCpuAddress(address); - VAddr current_addr = *head_address; - while (current_addr < *head_address + size * sizeof(CommandHeader)) { - const CommandHeader header = {Memory::Read32(current_addr)}; - current_addr += sizeof(u32); +MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); - switch (header.mode.Value()) { - case SubmissionMode::IncreasingOld: - case SubmissionMode::Increasing: { - // Increase the method value with each argument. - for (unsigned i = 0; i < header.arg_count; ++i) { - WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); - current_addr += sizeof(u32); +void GPU::ProcessCommandLists(const std::vector& commands) { + MICROPROFILE_SCOPE(ProcessCommandLists); + for (auto entry : commands) { + Tegra::GPUVAddr address = entry.Address(); + u32 size = entry.sz; + const boost::optional head_address = memory_manager->GpuToCpuAddress(address); + VAddr current_addr = *head_address; + while (current_addr < *head_address + size * sizeof(CommandHeader)) { + const CommandHeader header = {Memory::Read32(current_addr)}; + current_addr += sizeof(u32); + + switch (header.mode.Value()) { + case SubmissionMode::IncreasingOld: + case SubmissionMode::Increasing: { + // Increase the method value with each argument. + for (unsigned i = 0; i < header.arg_count; ++i) { + WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), + header.arg_count - i - 1); + current_addr += sizeof(u32); + } + break; } - break; - } - case SubmissionMode::NonIncreasingOld: - case SubmissionMode::NonIncreasing: { - // Use the same method value for all arguments. - for (unsigned i = 0; i < header.arg_count; ++i) { - WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); - current_addr += sizeof(u32); + case SubmissionMode::NonIncreasingOld: + case SubmissionMode::NonIncreasing: { + // Use the same method value for all arguments. + for (unsigned i = 0; i < header.arg_count; ++i) { + WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), + header.arg_count - i - 1); + current_addr += sizeof(u32); + } + break; } - break; - } - case SubmissionMode::IncreaseOnce: { - ASSERT(header.arg_count.Value() >= 1); + case SubmissionMode::IncreaseOnce: { + ASSERT(header.arg_count.Value() >= 1); - // Use the original method for the first argument and then the next method for all other - // arguments. - WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), - header.arg_count - 1); - current_addr += sizeof(u32); - - for (unsigned i = 1; i < header.arg_count; ++i) { - WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr), - header.arg_count - i - 1); + // Use the original method for the first argument and then the next method for all + // other arguments. + WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), + header.arg_count - 1); current_addr += sizeof(u32); + + for (unsigned i = 1; i < header.arg_count; ++i) { + WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr), + header.arg_count - i - 1); + current_addr += sizeof(u32); + } + break; + } + case SubmissionMode::Inline: { + // The register value is stored in the bits 16-28 as an immediate + WriteReg(header.method, header.subchannel, header.inline_data, 0); + break; + } + default: + UNIMPLEMENTED(); } - break; - } - case SubmissionMode::Inline: { - // The register value is stored in the bits 16-28 as an immediate - WriteReg(header.method, header.subchannel, header.inline_data, 0); - break; - } - default: - UNIMPLEMENTED(); } } } -- cgit v1.2.3 From c1b8cd90589141feb182da0d48c335bd624a4793 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Thu, 6 Sep 2018 17:02:46 +0200 Subject: video_core: Refactor command_processor. Inline the WriteReg helper as it is called ~20k times per frame. --- src/video_core/command_processor.cpp | 83 ++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 41 deletions(-) (limited to 'src/video_core/command_processor.cpp') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index e0c277105..2625ddfdc 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -28,51 +28,52 @@ enum class BufferMethods { CountBufferMethods = 0x40, }; -void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) { - LOG_TRACE(HW_GPU, - "Processing method {:08X} on subchannel {} value " - "{:08X} remaining params {}", - method, subchannel, value, remaining_params); - - ASSERT(subchannel < bound_engines.size()); - - if (method == static_cast(BufferMethods::BindObject)) { - // Bind the current subchannel to the desired engine id. - LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value); - bound_engines[subchannel] = static_cast(value); - return; - } - - if (method < static_cast(BufferMethods::CountBufferMethods)) { - // TODO(Subv): Research and implement these methods. - LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); - return; - } - - const EngineID engine = bound_engines[subchannel]; - - switch (engine) { - case EngineID::FERMI_TWOD_A: - fermi_2d->WriteReg(method, value); - break; - case EngineID::MAXWELL_B: - maxwell_3d->WriteReg(method, value, remaining_params); - break; - case EngineID::MAXWELL_COMPUTE_B: - maxwell_compute->WriteReg(method, value); - break; - case EngineID::MAXWELL_DMA_COPY_A: - maxwell_dma->WriteReg(method, value); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented engine"); - } -} - MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); void GPU::ProcessCommandLists(const std::vector& commands) { MICROPROFILE_SCOPE(ProcessCommandLists); + + auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) { + LOG_TRACE(HW_GPU, + "Processing method {:08X} on subchannel {} value " + "{:08X} remaining params {}", + method, subchannel, value, remaining_params); + + ASSERT(subchannel < bound_engines.size()); + + if (method == static_cast(BufferMethods::BindObject)) { + // Bind the current subchannel to the desired engine id. + LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value); + bound_engines[subchannel] = static_cast(value); + return; + } + + if (method < static_cast(BufferMethods::CountBufferMethods)) { + // TODO(Subv): Research and implement these methods. + LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); + return; + } + + const EngineID engine = bound_engines[subchannel]; + + switch (engine) { + case EngineID::FERMI_TWOD_A: + fermi_2d->WriteReg(method, value); + break; + case EngineID::MAXWELL_B: + maxwell_3d->WriteReg(method, value, remaining_params); + break; + case EngineID::MAXWELL_COMPUTE_B: + maxwell_compute->WriteReg(method, value); + break; + case EngineID::MAXWELL_DMA_COPY_A: + maxwell_dma->WriteReg(method, value); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented engine"); + } + }; + for (auto entry : commands) { Tegra::GPUVAddr address = entry.Address(); u32 size = entry.sz; -- cgit v1.2.3