diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/command_processor.cpp | 236 |
1 files changed, 147 insertions, 89 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index b46fadd9f..43ae06181 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -6,18 +6,20 @@ #include "common/profiler.h" +#include "core/hle/service/gsp_gpu.h" +#include "core/hw/gpu.h" +#include "core/settings.h" + +#include "debug_utils/debug_utils.h" + #include "clipper.h" #include "command_processor.h" #include "math.h" #include "pica.h" #include "primitive_assembly.h" +#include "renderer_base.h" #include "vertex_shader.h" #include "video_core.h" -#include "core/hle/service/gsp_gpu.h" -#include "core/hw/gpu.h" -#include "core/settings.h" - -#include "debug_utils/debug_utils.h" namespace Pica { @@ -43,12 +45,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq)) return; - // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value + // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value u32 old_value = regs[id]; regs[id] = (old_value & ~mask) | (value & mask); if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::CommandLoaded, reinterpret_cast<void*>(&id)); + g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); DebugUtils::OnPicaRegWrite(id, regs[id]); @@ -58,10 +60,50 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); break; + // Load default vertex input attributes + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): + { + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + default_attr_write_buffer[default_attr_counter++] = value; + + // Default attributes are written in a packed format such that four float24 values are encoded in + // three 32-bit numbers. We write to internal memory once a full such vector is + // written. + if (default_attr_counter >= 3) { + default_attr_counter = 0; + + auto& setup = regs.vs_default_attributes_setup; + + if (setup.index >= 16) { + LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); + break; + } + + Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; + + // NOTE: The destination component order indeed is "backwards" + attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); + + LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, + attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), + attribute.w.ToFloat32()); + + // TODO: Verify that this actually modifies the register! + setup.index = setup.index + 1; + } + break; + } + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): { - unsigned index = id - PICA_REG_INDEX(command_buffer.trigger[0]); + unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0])); u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); @@ -121,12 +163,55 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value()); PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value()); + if (g_debug_context) { + for (int i = 0; i < 3; ++i) { + const auto texture = regs.GetTextures()[i]; + if (!texture.enabled) + continue; + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + if (g_debug_context && Pica::g_debug_context->recorder) + g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress()); + } + } + + class { + /// Combine overlapping and close ranges + void SimplifyRanges() { + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too + auto it2 = std::next(it); + while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { + it->second = std::max(it->second, it2->first + it2->second - it->first); + it2 = ranges.erase(it2); + } + } + } + + public: + /// Record a particular memory access in the list + void AddAccess(u32 paddr, u32 size) { + // Create new range or extend existing one + ranges[paddr] = std::max(ranges[paddr], size); + + // Simplify ranges... + SimplifyRanges(); + } + + /// Map of accessed ranges (mapping start address to range size) + std::map<u32, u32> ranges; + } memory_accesses; + for (unsigned int index = 0; index < regs.num_vertices; ++index) { unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; if (is_indexed) { // TODO: Implement some sort of vertex cache! + if (g_debug_context && Pica::g_debug_context->recorder) { + int size = index_u16 ? 2 : 1; + memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); + } } // Initialize data for the current vertex @@ -149,7 +234,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Load per-vertex data from the loader arrays for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]); + u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; + const u8* srcdata = Memory::GetPhysicalPointer(source_addr); + + if (g_debug_context && Pica::g_debug_context->recorder) { + memory_accesses.AddAccess(source_addr, + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + } const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : @@ -190,7 +282,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { &geometry_dumper, _1, _2, _3)); // Send to vertex shader - VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); + VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); if (is_indexed) { // TODO: Add processed vertex to vertex cache! @@ -211,47 +303,53 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } } + for (auto& range : memory_accesses.ranges) { + g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), + range.second, range.first); + } + if (Settings::values.use_hw_renderer) { VideoCore::g_renderer->hw_rasterizer->DrawTriangles(); } geometry_dumper.Dump(); - if (g_debug_context) + if (g_debug_context) { g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } break; } - case PICA_REG_INDEX(vs_bool_uniforms): + case PICA_REG_INDEX(vs.bool_uniforms): for (unsigned i = 0; i < 16; ++i) - g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0; + g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; break; - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { - int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); - auto values = regs.vs_int_uniforms[index]; + int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); + auto values = regs.vs.int_uniforms[index]; g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); break; } - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { - auto& uniform_setup = regs.vs_uniform_setup; + auto& uniform_setup = regs.vs.uniform_setup; // TODO: Does actual hardware indeed keep an intermediate buffer or does // it directly write the values? @@ -293,73 +391,33 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { break; } - // Load default vertex input attributes - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): - { - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - default_attr_write_buffer[default_attr_counter++] = value; - - // Default attributes are written in a packed format such that four float24 values are encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if (default_attr_counter >= 3) { - default_attr_counter = 0; - - auto& setup = regs.vs_default_attributes_setup; - - if (setup.index >= 16) { - LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); - break; - } - - Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; - - // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); - - LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, - attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), - attribute.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - setup.index = setup.index + 1; - } - break; - } - // Load shader program code - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { - g_state.vs.program_code[regs.vs_program.offset] = value; - regs.vs_program.offset++; + g_state.vs.program_code[regs.vs.program.offset] = value; + regs.vs.program.offset++; break; } // Load swizzle pattern data - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { - g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value; - regs.vs_swizzle_patterns.offset++; + g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; + regs.vs.swizzle_patterns.offset++; break; } @@ -370,7 +428,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { VideoCore::g_renderer->hw_rasterizer->NotifyPicaRegisterChanged(id); if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id)); + g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id)); } void ProcessCommandList(const u32* list, u32 size) { |