diff options
author | David Marcec <dmarcecguzman@gmail.com> | 2018-04-26 23:28:54 +0200 |
---|---|---|
committer | David Marcec <dmarcecguzman@gmail.com> | 2018-04-26 23:28:54 +0200 |
commit | 7391741a204d6f25a06132eda214b2199b60a084 (patch) | |
tree | aeeb723744c4563ad608361b82dd938b062a3e09 /src/video_core/engines | |
parent | Added PREPO to logging backend, Removed comments from SaveReportWithUser (diff) | |
parent | Merge pull request #403 from lioncash/common (diff) | |
download | yuzu-7391741a204d6f25a06132eda214b2199b60a084.tar yuzu-7391741a204d6f25a06132eda214b2199b60a084.tar.gz yuzu-7391741a204d6f25a06132eda214b2199b60a084.tar.bz2 yuzu-7391741a204d6f25a06132eda214b2199b60a084.tar.lz yuzu-7391741a204d6f25a06132eda214b2199b60a084.tar.xz yuzu-7391741a204d6f25a06132eda214b2199b60a084.tar.zst yuzu-7391741a204d6f25a06132eda214b2199b60a084.zip |
Diffstat (limited to '')
-rw-r--r-- | src/video_core/engines/fermi_2d.cpp | 61 | ||||
-rw-r--r-- | src/video_core/engines/fermi_2d.h | 89 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 71 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 54 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 43 |
5 files changed, 281 insertions, 37 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 7aab163dc..9019f2504 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -2,12 +2,71 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "core/memory.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/textures/decoders.h" namespace Tegra { namespace Engines { -void Fermi2D::WriteReg(u32 method, u32 value) {} +Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {} + +void Fermi2D::WriteReg(u32 method, u32 value) { + ASSERT_MSG(method < Regs::NUM_REGS, + "Invalid Fermi2D register, increase the size of the Regs structure"); + + regs.reg_array[method] = value; + + switch (method) { + case FERMI2D_REG_INDEX(trigger): { + HandleSurfaceCopy(); + break; + } + } +} + +void Fermi2D::HandleSurfaceCopy() { + NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}", + static_cast<u32>(regs.operation)); + + const GPUVAddr source = regs.src.Address(); + const GPUVAddr dest = regs.dst.Address(); + + // TODO(Subv): Only same-format and same-size copies are allowed for now. + ASSERT(regs.src.format == regs.dst.format); + ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height); + + // TODO(Subv): Only raw copies are implemented. + ASSERT(regs.operation == Regs::Operation::SrcCopy); + + const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); + const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); + + u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); + u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); + + if (regs.src.linear == regs.dst.linear) { + // If the input layout and the output layout are the same, just perform a raw copy. + Memory::CopyBlock(dest_cpu, source_cpu, + src_bytes_per_pixel * regs.dst.width * regs.dst.height); + return; + } + + u8* src_buffer = Memory::GetPointer(source_cpu); + u8* dst_buffer = Memory::GetPointer(dest_cpu); + + if (!regs.src.linear && regs.dst.linear) { + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, src_buffer, dst_buffer, true, + regs.src.block_height); + } else { + // If the input is linear and the output is tiled, swizzle the input and copy it over. + Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, + dst_bytes_per_pixel, dst_buffer, src_buffer, false, + regs.dst.block_height); + } +} } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 8967ddede..0c5b413cc 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -4,19 +4,106 @@ #pragma once +#include <array> +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" namespace Tegra { namespace Engines { +#define FERMI2D_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) + class Fermi2D final { public: - Fermi2D() = default; + explicit Fermi2D(MemoryManager& memory_manager); ~Fermi2D() = default; /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value); + + struct Regs { + static constexpr size_t NUM_REGS = 0x258; + + struct Surface { + RenderTargetFormat format; + BitField<0, 1, u32> linear; + union { + BitField<0, 4, u32> block_depth; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_width; + }; + u32 depth; + u32 layer; + u32 pitch; + u32 width; + u32 height; + u32 address_high; + u32 address_low; + + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + }; + static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); + + enum class Operation : u32 { + SrcCopyAnd = 0, + ROPAnd = 1, + Blend = 2, + SrcCopy = 3, + ROP = 4, + SrcCopyPremult = 5, + BlendPremult = 6, + }; + + union { + struct { + INSERT_PADDING_WORDS(0x80); + + Surface dst; + + INSERT_PADDING_WORDS(2); + + Surface src; + + INSERT_PADDING_WORDS(0x15); + + Operation operation; + + INSERT_PADDING_WORDS(0x9); + + // TODO(Subv): This is only a guess. + u32 trigger; + + INSERT_PADDING_WORDS(0x1A3); + }; + std::array<u32, NUM_REGS> reg_array; + }; + } regs{}; + + MemoryManager& memory_manager; + +private: + /// Performs the copy from the source surface to the destination surface as configured in the + /// registers. + void HandleSurfaceCopy(); }; +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(dst, 0x80); +ASSERT_REG_POSITION(src, 0x8C); +ASSERT_REG_POSITION(operation, 0xAB); +ASSERT_REG_POSITION(trigger, 0xB5); +#undef ASSERT_REG_POSITION + } // namespace Engines } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2a3ff234a..4306b894f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -22,10 +22,6 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(MemoryManager& memory_manager) : memory_manager(memory_manager), macro_interpreter(*this) {} -void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) { - uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code); -} - void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { auto macro_code = uploaded_macros.find(method); // The requested macro must have been uploaded already. @@ -37,9 +33,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { - ASSERT_MSG(method < Regs::NUM_REGS, - "Invalid Maxwell3D register, increase the size of the Regs structure"); - auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); // It is an error to write to a register other than the current macro's ARG register before it @@ -68,6 +61,9 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { return; } + ASSERT_MSG(method < Regs::NUM_REGS, + "Invalid Maxwell3D register, increase the size of the Regs structure"); + if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } @@ -75,6 +71,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { regs.reg_array[method] = value; switch (method) { + case MAXWELL3D_REG_INDEX(macros.data): { + ProcessMacroUpload(value); + break; + } case MAXWELL3D_REG_INDEX(code_address.code_address_high): case MAXWELL3D_REG_INDEX(code_address.code_address_low): { // Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS @@ -141,17 +141,48 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { } } +void Maxwell3D::ProcessMacroUpload(u32 data) { + // Store the uploaded macro code to interpret them when they're called. + auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; + macro.push_back(data); +} + void Maxwell3D::ProcessQueryGet() { GPUVAddr sequence_address = regs.query.QueryAddress(); // Since the sequence address is given as a GPU VAddr, we have to convert it to an application // VAddr before writing. - VAddr address = memory_manager.PhysicalToVirtualAddress(sequence_address); + boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address); + + // TODO(Subv): Support the other query units. + ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, + "Units other than CROP are unimplemented"); + ASSERT_MSG(regs.query.query_get.short_query, + "Writing the entire query result structure is unimplemented"); + + u32 value = Memory::Read32(*address); + u32 result = 0; + + // TODO(Subv): Support the other query variables + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + result = 0; + break; + default: + UNIMPLEMENTED_MSG("Unimplemented query select type %u", + static_cast<u32>(regs.query.query_get.select.Value())); + } + + // TODO(Subv): Research and implement how query sync conditions work. switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: { + case Regs::QueryMode::Write: + case Regs::QueryMode::Write2: { // Write the current query sequence to the sequence address. u32 sequence = regs.query.query_sequence; - Memory::Write32(address, sequence); + Memory::Write32(*address, sequence); + + // TODO(Subv): Write the proper query response structure to the address when not using short + // mode. break; } default: @@ -161,8 +192,8 @@ void Maxwell3D::ProcessQueryGet() { } void Maxwell3D::DrawArrays() { - LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(), - regs.vertex_buffer.count); + NGLOG_DEBUG(HW_GPU, "called, topology={}, count={}", + static_cast<u32>(regs.draw.topology.Value()), regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); @@ -200,10 +231,10 @@ void Maxwell3D::ProcessCBData(u32 value) { // Don't allow writing past the end of the buffer. ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); - VAddr address = - memory_manager.PhysicalToVirtualAddress(buffer_address + regs.const_buffer.cb_pos); + boost::optional<VAddr> address = + memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); - Memory::Write32(address, value); + Memory::Write32(*address, value); // Increment the current buffer position. regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; @@ -213,10 +244,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { GPUVAddr tic_base_address = regs.tic.TICAddress(); GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); - VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu); + boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); Texture::TICEntry tic_entry; - Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); + Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || tic_entry.header_version == Texture::TICHeaderVersion::Pitch, @@ -243,10 +274,10 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); - VAddr tsc_address_cpu = memory_manager.PhysicalToVirtualAddress(tsc_address_gpu); + boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); Texture::TSCEntry tsc_entry; - Memory::ReadBlock(tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); + Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; } @@ -268,7 +299,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { Texture::TextureHandle tex_handle{ - Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))}; + Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))}; Texture::FullTextureInfo tex_info{}; // TODO(Subv): Use the shader to determine which textures are actually accessed. diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d4fcedace..5cf62fb01 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -31,7 +31,7 @@ public: /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. struct Regs { - static constexpr size_t NUM_REGS = 0xE36; + static constexpr size_t NUM_REGS = 0xE00; static constexpr size_t NumRenderTargets = 8; static constexpr size_t NumViewports = 16; @@ -46,6 +46,29 @@ public: enum class QueryMode : u32 { Write = 0, Sync = 1, + // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 + // is. + Write2 = 2, + }; + + enum class QueryUnit : u32 { + VFetch = 1, + VP = 2, + Rast = 4, + StrmOut = 5, + GP = 6, + ZCull = 7, + Prop = 10, + Crop = 15, + }; + + enum class QuerySelect : u32 { + Zero = 0, + }; + + enum class QuerySyncCondition : u32 { + NotEqual = 0, + GreaterThan = 1, }; enum class ShaderProgram : u32 { @@ -299,7 +322,15 @@ public: union { struct { - INSERT_PADDING_WORDS(0x200); + INSERT_PADDING_WORDS(0x45); + + struct { + INSERT_PADDING_WORDS(1); + u32 data; + u32 entry; + } macros; + + INSERT_PADDING_WORDS(0x1B8); struct { u32 address_high; @@ -476,7 +507,10 @@ public: u32 raw; BitField<0, 2, QueryMode> mode; BitField<4, 1, u32> fence; - BitField<12, 4, u32> unit; + BitField<12, 4, QueryUnit> unit; + BitField<16, 1, QuerySyncCondition> sync_cond; + BitField<23, 5, QuerySelect> select; + BitField<28, 1, u32> short_query; } query_get; GPUVAddr QueryAddress() const { @@ -500,6 +534,11 @@ public: return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | start_low); } + + bool IsEnabled() const { + return enable != 0 && StartAddress() != 0; + } + } vertex_array[NumVertexArrays]; Blend blend; @@ -574,7 +613,7 @@ public: u32 size[MaxShaderStage]; } tex_info_buffers; - INSERT_PADDING_WORDS(0x102); + INSERT_PADDING_WORDS(0xCC); }; std::array<u32, NUM_REGS> reg_array; }; @@ -606,9 +645,6 @@ public: /// Write the value to the register identified by method. void WriteReg(u32 method, u32 value, u32 remaining_params); - /// Uploads the code for a GPU macro program associated with the specified entry. - void SubmitMacroCode(u32 entry, std::vector<u32> code); - /// Returns a list of enabled textures for the specified shader stage. std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; @@ -639,6 +675,9 @@ private: */ void CallMacroMethod(u32 method, std::vector<u32> parameters); + /// Handles writes to the macro uploading registers. + void ProcessMacroUpload(u32 data); + /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); @@ -656,6 +695,7 @@ private: static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +ASSERT_REG_POSITION(macros, 0x45); ASSERT_REG_POSITION(rt, 0x200); ASSERT_REG_POSITION(viewport_transform[0], 0x280); ASSERT_REG_POSITION(viewport, 0x300); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5a006aee5..f4d11fa5d 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -214,6 +214,20 @@ union Instruction { BitField<56, 1, u64> neg_b; } fsetp; + union { + BitField<39, 3, u64> pred39; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> neg_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, PredOperation> op; + BitField<48, 4, PredCondition> cond; + BitField<53, 1, u64> neg_b; + BitField<54, 1, u64> abs_a; + BitField<52, 1, u64> bf; + BitField<55, 1, u64> ftz; + BitField<56, 1, u64> neg_imm; + } fset; + BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; @@ -261,6 +275,9 @@ public: I2F_C, I2F_R, I2F_IMM, + I2I_C, + I2I_R, + I2I_IMM, LOP32I, MOV_C, MOV_R, @@ -272,6 +289,9 @@ public: FSETP_C, // Set Predicate FSETP_R, FSETP_IMM, + FSET_C, + FSET_R, + FSET_IMM, ISETP_C, ISETP_IMM, ISETP_R, @@ -283,8 +303,9 @@ public: Ffma, Flow, Memory, - FloatPredicate, - IntegerPredicate, + FloatSet, + FloatSetPredicate, + IntegerSetPredicate, Unknown, }; @@ -409,6 +430,9 @@ private: INST("0100110010111---", Id::I2F_C, Type::Arithmetic, "I2F_C"), INST("0101110010111---", Id::I2F_R, Type::Arithmetic, "I2F_R"), INST("0011100-10111---", Id::I2F_IMM, Type::Arithmetic, "I2F_IMM"), + INST("0100110011100---", Id::I2I_C, Type::Arithmetic, "I2I_C"), + INST("0101110011100---", Id::I2I_R, Type::Arithmetic, "I2I_R"), + INST("01110001-1000---", Id::I2I_IMM, Type::Arithmetic, "I2I_IMM"), INST("000001----------", Id::LOP32I, Type::Arithmetic, "LOP32I"), INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), @@ -417,12 +441,15 @@ private: INST("0100110000101---", Id::SHR_C, Type::Arithmetic, "SHR_C"), INST("0101110000101---", Id::SHR_R, Type::Arithmetic, "SHR_R"), INST("0011100-00101---", Id::SHR_IMM, Type::Arithmetic, "SHR_IMM"), - INST("010010111011----", Id::FSETP_C, Type::FloatPredicate, "FSETP_C"), - INST("010110111011----", Id::FSETP_R, Type::FloatPredicate, "FSETP_R"), - INST("0011011-1011----", Id::FSETP_IMM, Type::FloatPredicate, "FSETP_IMM"), - INST("010010110110----", Id::ISETP_C, Type::IntegerPredicate, "ISETP_C"), - INST("010110110110----", Id::ISETP_R, Type::IntegerPredicate, "ISETP_R"), - INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerPredicate, "ISETP_IMM"), + INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"), + INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"), + INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"), + INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"), + INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"), + INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"), + INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"), + INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"), + INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"), }; #undef INST std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { |