diff options
Diffstat (limited to 'src/video_core/engines')
-rw-r--r-- | src/video_core/engines/engine_interface.h | 8 | ||||
-rw-r--r-- | src/video_core/engines/engine_upload.h | 8 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 20 | ||||
-rw-r--r-- | src/video_core/engines/kepler_compute.h | 17 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 6 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 6 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_dma.h | 55 | ||||
-rw-r--r-- | src/video_core/engines/puller.cpp | 15 |
8 files changed, 100 insertions, 35 deletions
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 392322358..54631ee6c 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -11,6 +11,14 @@ namespace Tegra::Engines { +enum class EngineTypes : u32 { + KeplerCompute, + Maxwell3D, + Fermi2D, + MaxwellDMA, + KeplerMemory, +}; + class EngineInterface { public: virtual ~EngineInterface() = default; diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 7242d2529..21bf8aeb4 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -69,6 +69,14 @@ public: /// Binds a rasterizer to this engine. void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + GPUVAddr ExecTargetAddress() const { + return regs.dest.Address(); + } + + u32 GetUploadSize() const { + return copy_size; + } + private: void ProcessData(std::span<const u8> read_buffer); diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a38d9528a..cd61ab222 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal switch (method) { case KEPLER_COMPUTE_REG_INDEX(exec_upload): { + UploadInfo info{.upload_address = upload_address, + .exec_address = upload_state.ExecTargetAddress(), + .copy_size = upload_state.GetUploadSize()}; + uploads.push_back(info); upload_state.ProcessExec(regs.exec_upload.linear != 0); break; } case KEPLER_COMPUTE_REG_INDEX(data_upload): { + upload_address = current_dma_segment; upload_state.ProcessData(method_argument, is_last_call); break; } - case KEPLER_COMPUTE_REG_INDEX(launch): + case KEPLER_COMPUTE_REG_INDEX(launch): { + const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); + + for (auto& data : uploads) { + const GPUVAddr offset = data.exec_address - launch_desc_loc; + if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) && + memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) { + indirect_compute = {data.upload_address}; + } + } + uploads.clear(); ProcessLaunch(); + indirect_compute = std::nullopt; break; + } default: break; } @@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun u32 methods_pending) { switch (method) { case KEPLER_COMPUTE_REG_INDEX(data_upload): + upload_address = current_dma_segment; upload_state.ProcessData(base_start, amount); return; default: diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 2092e685f..735e05fb4 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -5,6 +5,7 @@ #include <array> #include <cstddef> +#include <optional> #include <vector> #include "common/bit_field.h" #include "common/common_funcs.h" @@ -36,6 +37,9 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) +#define LAUNCH_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32)) + class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); @@ -201,6 +205,10 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; + std::optional<GPUVAddr> GetIndirectComputeAddress() const { + return indirect_compute; + } + private: void ProcessLaunch(); @@ -216,6 +224,15 @@ private: MemoryManager& memory_manager; VideoCore::RasterizerInterface* rasterizer = nullptr; Upload::State upload_state; + GPUVAddr upload_address; + + struct UploadInfo { + GPUVAddr upload_address; + GPUVAddr exec_address; + u32 copy_size; + }; + std::vector<UploadInfo> uploads; + std::optional<GPUVAddr> indirect_compute{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c3696096d..06e349e43 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -257,6 +257,7 @@ u32 Maxwell3D::GetMaxCurrentVertices() { const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); num_vertices = std::max( num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value())); + break; } return num_vertices; } @@ -269,10 +270,13 @@ size_t Maxwell3D::EstimateIndexBufferSize() { std::numeric_limits<u32>::max()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); const size_t log2_byte_size = Common::Log2Ceil64(byte_size); + const size_t cap{GetMaxCurrentVertices() * 3 * byte_size}; + const size_t lower_cap = + std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); return std::min<size_t>( memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) / byte_size, - static_cast<size_t>(end_address - start_address)); + lower_cap); } u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index cd8e24b0b..279f0daa1 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/polyfill_ranges.h" #include "common/settings.h" #include "core/core.h" #include "core/memory.h" @@ -108,10 +109,11 @@ void MaxwellDMA::Launch() { const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); + accelerate.BufferClear(regs.offset_out, regs.line_length_in, + regs.remap_const.remap_consta_value); read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); - std::ranges::fill(span, regs.remap_consta_value); + std::ranges::fill(span, regs.remap_const.remap_consta_value); memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(read_buffer.data()), regs.line_length_in * sizeof(u32)); diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 69e26cb32..1a43e24b6 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -214,14 +214,15 @@ public: NO_WRITE = 6, }; - PackedGPUVAddr address; + u32 remap_consta_value; + u32 remap_constb_value; union { + BitField<0, 12, u32> dst_components_raw; BitField<0, 3, Swizzle> dst_x; BitField<4, 3, Swizzle> dst_y; BitField<8, 3, Swizzle> dst_z; BitField<12, 3, Swizzle> dst_w; - BitField<0, 12, u32> dst_components_raw; BitField<16, 2, u32> component_size_minus_one; BitField<20, 2, u32> num_src_components_minus_one; BitField<24, 2, u32> num_dst_components_minus_one; @@ -274,55 +275,57 @@ private: struct Regs { union { struct { - u32 reserved[0x40]; + INSERT_PADDING_BYTES_NOINIT(0x100); u32 nop; - u32 reserved01[0xf]; + INSERT_PADDING_BYTES_NOINIT(0x3C); u32 pm_trigger; - u32 reserved02[0x3f]; + INSERT_PADDING_BYTES_NOINIT(0xFC); Semaphore semaphore; - u32 reserved03[0x2]; + INSERT_PADDING_BYTES_NOINIT(0x8); RenderEnable render_enable; PhysMode src_phys_mode; PhysMode dst_phys_mode; - u32 reserved04[0x26]; + INSERT_PADDING_BYTES_NOINIT(0x98); LaunchDMA launch_dma; - u32 reserved05[0x3f]; + INSERT_PADDING_BYTES_NOINIT(0xFC); PackedGPUVAddr offset_in; PackedGPUVAddr offset_out; s32 pitch_in; s32 pitch_out; u32 line_length_in; u32 line_count; - u32 reserved06[0xb6]; - u32 remap_consta_value; - u32 remap_constb_value; + INSERT_PADDING_BYTES_NOINIT(0x2E0); RemapConst remap_const; DMA::Parameters dst_params; - u32 reserved07[0x1]; + INSERT_PADDING_BYTES_NOINIT(0x4); DMA::Parameters src_params; - u32 reserved08[0x275]; + INSERT_PADDING_BYTES_NOINIT(0x9D4); u32 pm_trigger_end; - u32 reserved09[0x3ba]; + INSERT_PADDING_BYTES_NOINIT(0xEE8); }; std::array<u32, NUM_REGS> reg_array; }; } regs{}; + static_assert(sizeof(Regs) == NUM_REGS * 4); #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \ + static_assert(offsetof(MaxwellDMA::Regs, field_name) == position, \ "Field " #field_name " has invalid position") - ASSERT_REG_POSITION(launch_dma, 0xC0); - ASSERT_REG_POSITION(offset_in, 0x100); - ASSERT_REG_POSITION(offset_out, 0x102); - ASSERT_REG_POSITION(pitch_in, 0x104); - ASSERT_REG_POSITION(pitch_out, 0x105); - ASSERT_REG_POSITION(line_length_in, 0x106); - ASSERT_REG_POSITION(line_count, 0x107); - ASSERT_REG_POSITION(remap_const, 0x1C0); - ASSERT_REG_POSITION(dst_params, 0x1C3); - ASSERT_REG_POSITION(src_params, 0x1CA); - + ASSERT_REG_POSITION(semaphore, 0x240); + ASSERT_REG_POSITION(render_enable, 0x254); + ASSERT_REG_POSITION(src_phys_mode, 0x260); + ASSERT_REG_POSITION(launch_dma, 0x300); + ASSERT_REG_POSITION(offset_in, 0x400); + ASSERT_REG_POSITION(offset_out, 0x408); + ASSERT_REG_POSITION(pitch_in, 0x410); + ASSERT_REG_POSITION(pitch_out, 0x414); + ASSERT_REG_POSITION(line_length_in, 0x418); + ASSERT_REG_POSITION(line_count, 0x41C); + ASSERT_REG_POSITION(remap_const, 0x700); + ASSERT_REG_POSITION(dst_params, 0x70C); + ASSERT_REG_POSITION(src_params, 0x728); + ASSERT_REG_POSITION(pm_trigger_end, 0x1114); #undef ASSERT_REG_POSITION }; diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 7718a09b3..6de2543b7 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: - dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel, + EngineTypes::Fermi2D); break; case EngineID::MAXWELL_B: - dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel, + EngineTypes::Maxwell3D); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel, + EngineTypes::KeplerCompute); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel, + EngineTypes::MaxwellDMA); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel, + EngineTypes::KeplerMemory); break; default: UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); |