From 0cf15f64efe0c2dff67d27aac41def4dbcf44262 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 18:07:34 +0200 Subject: Remove late accesses to attribute_config --- src/video_core/command_processor.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/video_core/command_processor.cpp') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 3abe79c09..8030d17a7 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -198,6 +198,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { const auto& attribute_config = regs.vertex_attributes; const u32 base_address = attribute_config.GetPhysicalBaseAddress(); + int num_total_attributes = attribute_config.GetNumTotalAttributes(); // Information about internal vertex attributes u32 vertex_attribute_sources[16]; @@ -207,7 +208,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { u32 vertex_attribute_elements[16] = {}; u32 vertex_attribute_element_size[16] = {}; - + bool vertex_attribute_default[16] = {}; // Setup attribute data from loaders for (int loader = 0; loader < 12; ++loader) { const auto& loader_config = attribute_config.attribute_loaders[loader]; @@ -230,6 +231,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); vertex_attribute_element_size[attribute_index] = element_size; + vertex_attribute_default[attribute_index] = attribute_config.IsDefaultAttribute(attribute_index); offset += attribute_config.GetStride(attribute_index); } else if (attribute_index < 16) { // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively @@ -333,7 +335,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Initialize data for the current vertex Shader::InputVertex input; - for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { + for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { // Default attribute values set if array elements have < 4 components. This // is *not* carried over from the default attribute settings even if they're @@ -362,12 +364,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { input.attr[i][comp] = float24::FromFloat32(srcval); LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", comp, i, vertex, index, - attribute_config.GetPhysicalBaseAddress(), + base_address, vertex_attribute_sources[i] - base_address, vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], input.attr[i][comp].ToFloat32()); } - } else if (attribute_config.IsDefaultAttribute(i)) { + } else if (vertex_attribute_default[i]) { // Load the default attribute if we're configured to do so input.attr[i] = g_state.vs.default_attributes[i]; LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", @@ -385,7 +387,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); // Send to vertex shader - output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); + output = Shader::Run(shader_unit, input, num_total_attributes); if (is_indexed) { vertex_cache[vertex_cache_pos] = output; -- cgit v1.2.3 From 47ff00881703eeab03d32e60289ac34b7f4a7994 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 19:01:47 +0200 Subject: Refactor: Extract VertexLoader from command_processor.cpp. Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached. --- src/video_core/command_processor.cpp | 134 +++-------------------------------- 1 file changed, 10 insertions(+), 124 deletions(-) (limited to 'src/video_core/command_processor.cpp') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 8030d17a7..8c3e982a3 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -22,6 +22,7 @@ #include "video_core/video_core.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/shader/shader_interpreter.h" +#include "video_core/vertex_loader.h" namespace Pica { @@ -192,62 +193,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { #if PICA_LOG_TEV DebugUtils::DumpTevStageConfig(regs.GetTevStages()); #endif - if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - const auto& attribute_config = regs.vertex_attributes; - const u32 base_address = attribute_config.GetPhysicalBaseAddress(); - int num_total_attributes = attribute_config.GetNumTotalAttributes(); - - // Information about internal vertex attributes - u32 vertex_attribute_sources[16]; - boost::fill(vertex_attribute_sources, 0xdeadbeef); - u32 vertex_attribute_strides[16] = {}; - Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; - - u32 vertex_attribute_elements[16] = {}; - u32 vertex_attribute_element_size[16] = {}; - bool vertex_attribute_default[16] = {}; - // Setup attribute data from loaders - for (int loader = 0; loader < 12; ++loader) { - const auto& loader_config = attribute_config.attribute_loaders[loader]; - - u32 offset = 0; - - // TODO: What happens if a loader overwrites a previous one's data? - for (unsigned component = 0; component < loader_config.component_count; ++component) { - if (component >= 12) { - LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); - continue; - } - - u32 attribute_index = loader_config.GetComponent(component); - if (attribute_index < 12) { - int element_size = attribute_config.GetElementSizeInBytes(attribute_index); - offset = Common::AlignUp(offset, element_size); - vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset; - vertex_attribute_strides[attribute_index] = static_cast(loader_config.byte_count); - vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); - vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); - vertex_attribute_element_size[attribute_index] = element_size; - vertex_attribute_default[attribute_index] = attribute_config.IsDefaultAttribute(attribute_index); - offset += attribute_config.GetStride(attribute_index); - } else if (attribute_index < 16) { - // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively - offset = Common::AlignUp(offset, 4); - offset += (attribute_index - 11) * 4; - } else { - UNREACHABLE(); // This is truly unreachable due to the number of bits for each component - } - } - } + // Processes information about internal vertex attributes to figure out how a vertex is loaded. + // Later, these can be compiled and cached. + VertexLoader loader; + loader.Setup(regs); // Load vertices bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); const auto& index_info = regs.index_array; - const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); + const u8* index_address_8 = Memory::GetPhysicalPointer(loader.GetPhysicalBaseAddress() + index_info.offset); const u16* index_address_16 = reinterpret_cast(index_address_8); bool index_u16 = index_info.format != 0; @@ -265,32 +223,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } } - class { - /// Combine overlapping and close ranges - void SimplifyRanges() { - for (auto it = ranges.begin(); it != ranges.end(); ++it) { - // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too - auto it2 = std::next(it); - while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { - it->second = std::max(it->second, it2->first + it2->second - it->first); - it2 = ranges.erase(it2); - } - } - } - - public: - /// Record a particular memory access in the list - void AddAccess(u32 paddr, u32 size) { - // Create new range or extend existing one - ranges[paddr] = std::max(ranges[paddr], size); - - // Simplify ranges... - SimplifyRanges(); - } - - /// Map of accessed ranges (mapping start address to range size) - std::map ranges; - } memory_accesses; + MemoryAccesses memory_accesses; // Simple circular-replacement vertex cache // The size has been tuned for optimal balance between hit-rate and the cost of lookup @@ -319,7 +252,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (is_indexed) { if (g_debug_context && Pica::g_debug_context->recorder) { int size = index_u16 ? 2 : 1; - memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); + memory_accesses.AddAccess(loader.GetPhysicalBaseAddress() + index_info.offset + size * index, size); } for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { @@ -334,60 +267,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex Shader::InputVertex input; - - for (int i = 0; i < num_total_attributes; ++i) { - if (vertex_attribute_elements[i] != 0) { - // Default attribute values set if array elements have < 4 components. This - // is *not* carried over from the default attribute settings even if they're - // enabled for this attribute. - static const float24 zero = float24::FromFloat32(0.0f); - static const float24 one = float24::FromFloat32(1.0f); - input.attr[i] = Math::Vec4(zero, zero, zero, one); - - // Load per-vertex data from the loader arrays - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; - const u8* srcdata = Memory::GetPhysicalPointer(source_addr); - - if (g_debug_context && Pica::g_debug_context->recorder) { - memory_accesses.AddAccess(source_addr, - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 - : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); - } - - const float srcval = - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast(srcdata) : - *reinterpret_cast(srcdata); - - input.attr[i][comp] = float24::FromFloat32(srcval); - LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", - comp, i, vertex, index, - base_address, - vertex_attribute_sources[i] - base_address, - vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], - input.attr[i][comp].ToFloat32()); - } - } else if (vertex_attribute_default[i]) { - // Load the default attribute if we're configured to do so - input.attr[i] = g_state.vs.default_attributes[i]; - LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", - i, vertex, index, - input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), - input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); - } else { - // TODO(yuriks): In this case, no data gets loaded and the vertex - // remains with the last value it had. This isn't currently maintained - // as global state, however, and so won't work in Citra yet. - } - } + loader.LoadVertex(index, vertex, input, memory_accesses); if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); // Send to vertex shader - output = Shader::Run(shader_unit, input, num_total_attributes); + output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); if (is_indexed) { vertex_cache[vertex_cache_pos] = output; -- cgit v1.2.3 From 2403e86cbb5c49afeceb98f7c6e843da78dff415 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Thu, 28 Apr 2016 20:17:35 +0200 Subject: Don't keep base_address in the loader, it doesn't belong there (with it, the loader can't be cached). --- src/video_core/command_processor.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/video_core/command_processor.cpp') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 8c3e982a3..373fb51ad 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -199,13 +199,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Processes information about internal vertex attributes to figure out how a vertex is loaded. // Later, these can be compiled and cached. VertexLoader loader; + const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); loader.Setup(regs); // Load vertices bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); const auto& index_info = regs.index_array; - const u8* index_address_8 = Memory::GetPhysicalPointer(loader.GetPhysicalBaseAddress() + index_info.offset); + const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); const u16* index_address_16 = reinterpret_cast(index_address_8); bool index_u16 = index_info.format != 0; @@ -252,7 +253,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (is_indexed) { if (g_debug_context && Pica::g_debug_context->recorder) { int size = index_u16 ? 2 : 1; - memory_accesses.AddAccess(loader.GetPhysicalBaseAddress() + index_info.offset + size * index, size); + memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); } for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { @@ -267,7 +268,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex Shader::InputVertex input; - loader.LoadVertex(index, vertex, input, memory_accesses); + loader.LoadVertex(base_address, index, vertex, input, memory_accesses); if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); -- cgit v1.2.3 From a86d7cacc1f56c6e8ff5f046ba7e2477e92d873f Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 29 Apr 2016 08:50:21 +0200 Subject: Move and rename the MemoryAccesses class to MemoryAccessTracker. --- src/video_core/command_processor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/command_processor.cpp') diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 373fb51ad..f181f75d7 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -224,7 +224,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } } - MemoryAccesses memory_accesses; + DebugUtils::MemoryAccessTracker memory_accesses; // Simple circular-replacement vertex cache // The size has been tuned for optimal balance between hit-rate and the cost of lookup -- cgit v1.2.3