diff options
author | bunnei <bunneidev@gmail.com> | 2016-04-29 15:42:47 +0200 |
---|---|---|
committer | bunnei <bunneidev@gmail.com> | 2016-04-29 15:42:47 +0200 |
commit | 90243c56fb90d7d74cbef40da3eec97d967c10a2 (patch) | |
tree | 94d223001196ca9b774a8d018535ba2be8de1b01 | |
parent | Common: Remove section measurement from profiler (#1731) (diff) | |
parent | Move and rename the MemoryAccesses class to MemoryAccessTracker. (diff) | |
download | yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.gz yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.bz2 yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.lz yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.xz yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.zst yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.zip |
-rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 129 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 30 | ||||
-rw-r--r-- | src/video_core/shader/shader.h | 2 | ||||
-rw-r--r-- | src/video_core/vertex_loader.cpp | 140 | ||||
-rw-r--r-- | src/video_core/vertex_loader.h | 28 |
6 files changed, 210 insertions, 121 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 76cfd4f7d..de4082b1f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -16,6 +16,7 @@ set(SRCS shader/shader_interpreter.cpp swrasterizer.cpp utils.cpp + vertex_loader.cpp video_core.cpp ) @@ -43,6 +44,7 @@ set(HEADERS shader/shader_interpreter.h swrasterizer.h utils.h + vertex_loader.h video_core.h ) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 97ba8214e..58883e374 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -21,6 +21,7 @@ #include "video_core/video_core.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/shader/shader_interpreter.h" +#include "video_core/vertex_loader.h" namespace Pica { @@ -188,54 +189,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { #if PICA_LOG_TEV DebugUtils::DumpTevStageConfig(regs.GetTevStages()); #endif - if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - const auto& attribute_config = regs.vertex_attributes; - const u32 base_address = attribute_config.GetPhysicalBaseAddress(); - - // Information about internal vertex attributes - u32 vertex_attribute_sources[16]; - boost::fill(vertex_attribute_sources, 0xdeadbeef); - u32 vertex_attribute_strides[16] = {}; - Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; - - u32 vertex_attribute_elements[16] = {}; - u32 vertex_attribute_element_size[16] = {}; - - // Setup attribute data from loaders - for (int loader = 0; loader < 12; ++loader) { - const auto& loader_config = attribute_config.attribute_loaders[loader]; - - u32 offset = 0; - - // TODO: What happens if a loader overwrites a previous one's data? - for (unsigned component = 0; component < loader_config.component_count; ++component) { - if (component >= 12) { - LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); - continue; - } - - u32 attribute_index = loader_config.GetComponent(component); - if (attribute_index < 12) { - int element_size = attribute_config.GetElementSizeInBytes(attribute_index); - offset = Common::AlignUp(offset, element_size); - vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset; - vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); - vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); - vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); - vertex_attribute_element_size[attribute_index] = element_size; - offset += attribute_config.GetStride(attribute_index); - } else if (attribute_index < 16) { - // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively - offset = Common::AlignUp(offset, 4); - offset += (attribute_index - 11) * 4; - } else { - UNREACHABLE(); // This is truly unreachable due to the number of bits for each component - } - } - } + // Processes information about internal vertex attributes to figure out how a vertex is loaded. + // Later, these can be compiled and cached. + VertexLoader loader; + const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); + loader.Setup(regs); // Load vertices bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); @@ -259,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } } - class { - /// Combine overlapping and close ranges - void SimplifyRanges() { - for (auto it = ranges.begin(); it != ranges.end(); ++it) { - // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too - auto it2 = std::next(it); - while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { - it->second = std::max(it->second, it2->first + it2->second - it->first); - it2 = ranges.erase(it2); - } - } - } - - public: - /// Record a particular memory access in the list - void AddAccess(u32 paddr, u32 size) { - // Create new range or extend existing one - ranges[paddr] = std::max(ranges[paddr], size); - - // Simplify ranges... - SimplifyRanges(); - } - - /// Map of accessed ranges (mapping start address to range size) - std::map<u32, u32> ranges; - } memory_accesses; + DebugUtils::MemoryAccessTracker memory_accesses; // Simple circular-replacement vertex cache // The size has been tuned for optimal balance between hit-rate and the cost of lookup @@ -328,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { if (!vertex_cache_hit) { // Initialize data for the current vertex Shader::InputVertex input; - - for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { - if (vertex_attribute_elements[i] != 0) { - // Default attribute values set if array elements have < 4 components. This - // is *not* carried over from the default attribute settings even if they're - // enabled for this attribute. - static const float24 zero = float24::FromFloat32(0.0f); - static const float24 one = float24::FromFloat32(1.0f); - input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); - - // Load per-vertex data from the loader arrays - for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; - const u8* srcdata = Memory::GetPhysicalPointer(source_addr); - - if (g_debug_context && Pica::g_debug_context->recorder) { - memory_accesses.AddAccess(source_addr, - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 - : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); - } - - const float srcval = - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) : - (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) : - *reinterpret_cast<const float*>(srcdata); - - input.attr[i][comp] = float24::FromFloat32(srcval); - LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", - comp, i, vertex, index, - attribute_config.GetPhysicalBaseAddress(), - vertex_attribute_sources[i] - base_address, - vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], - input.attr[i][comp].ToFloat32()); - } - } else if (attribute_config.IsDefaultAttribute(i)) { - // Load the default attribute if we're configured to do so - input.attr[i] = g_state.vs.default_attributes[i]; - LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", - i, vertex, index, - input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), - input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); - } else { - // TODO(yuriks): In this case, no data gets loaded and the vertex - // remains with the last value it had. This isn't currently maintained - // as global state, however, and so won't work in Citra yet. - } - } + loader.LoadVertex(base_address, index, vertex, input, memory_accesses); if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); // Send to vertex shader - output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes()); + output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); if (is_indexed) { vertex_cache[vertex_cache_pos] = output; diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 56f9bd958..dd0828cee 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -216,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); +/** + * Used in the vertex loader to merge access records. TODO: Investigate if actually useful. + */ +class MemoryAccessTracker { + /// Combine overlapping and close ranges + void SimplifyRanges() { + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too + auto it2 = std::next(it); + while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { + it->second = std::max(it->second, it2->first + it2->second - it->first); + it2 = ranges.erase(it2); + } + } + } + +public: + /// Record a particular memory access in the list + void AddAccess(u32 paddr, u32 size) { + // Create new range or extend existing one + ranges[paddr] = std::max(ranges[paddr], size); + + // Simplify ranges... + SimplifyRanges(); + } + + /// Map of accessed ranges (mapping start address to range size) + std::map<u32, u32> ranges; +}; + } // namespace } // namespace diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 9c5bd97bd..9ce9344d2 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -25,7 +25,7 @@ namespace Pica { namespace Shader { struct InputVertex { - Math::Vec4<float24> attr[16]; + alignas(16) Math::Vec4<float24> attr[16]; }; struct OutputVertex { diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp new file mode 100644 index 000000000..8a3d91896 --- /dev/null +++ b/src/video_core/vertex_loader.cpp @@ -0,0 +1,140 @@ +#include <cmath> +#include <string> + +#include "boost/range/algorithm/fill.hpp" + +#include "common/assert.h" +#include "common/alignment.h" +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/logging/log.h" + +#include "core/memory.h" + +#include "video_core/debug_utils/debug_utils.h" +#include "video_core/pica.h" +#include "video_core/pica_state.h" +#include "video_core/pica_types.h" +#include "video_core/vertex_loader.h" + +namespace Pica { + +void VertexLoader::Setup(const Pica::Regs& regs) { + const auto& attribute_config = regs.vertex_attributes; + num_total_attributes = attribute_config.GetNumTotalAttributes(); + + boost::fill(vertex_attribute_sources, 0xdeadbeef); + + for (int i = 0; i < 16; i++) { + vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i); + } + + // Setup attribute data from loaders + for (int loader = 0; loader < 12; ++loader) { + const auto& loader_config = attribute_config.attribute_loaders[loader]; + + u32 offset = 0; + + // TODO: What happens if a loader overwrites a previous one's data? + for (unsigned component = 0; component < loader_config.component_count; ++component) { + if (component >= 12) { + LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); + continue; + } + + u32 attribute_index = loader_config.GetComponent(component); + if (attribute_index < 12) { + offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); + vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; + vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); + vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); + vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); + offset += attribute_config.GetStride(attribute_index); + } else if (attribute_index < 16) { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively + offset = Common::AlignUp(offset, 4); + offset += (attribute_index - 11) * 4; + } else { + UNREACHABLE(); // This is truly unreachable due to the number of bits for each component + } + } + } +} + +void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { + for (int i = 0; i < num_total_attributes; ++i) { + if (vertex_attribute_elements[i] != 0) { + // Load per-vertex data from the loader arrays + u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; + + if (g_debug_context && Pica::g_debug_context->recorder) { + memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * ( + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1)); + } + + switch (vertex_attribute_formats[i]) { + case Regs::VertexAttributeFormat::BYTE: + { + const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::UBYTE: + { + const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::SHORT: + { + const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + case Regs::VertexAttributeFormat::FLOAT: + { + const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); + for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { + input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); + } + break; + } + } + + // Default attribute values set if array elements have < 4 components. This + // is *not* carried over from the default attribute settings even if they're + // enabled for this attribute. + for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { + input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + } + + LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", + vertex_attribute_elements[i], i, vertex, index, + base_address, + vertex_attribute_sources[i], + vertex_attribute_strides[i] * vertex, + input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); + } else if (vertex_attribute_is_default[i]) { + // Load the default attribute if we're configured to do so + input.attr[i] = g_state.vs.default_attributes[i]; + LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", + i, vertex, index, + input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), + input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); + } else { + // TODO(yuriks): In this case, no data gets loaded and the vertex + // remains with the last value it had. This isn't currently maintained + // as global state, however, and so won't work in Citra yet. + } + } +} + +} // namespace Pica
\ No newline at end of file diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h new file mode 100644 index 000000000..ff42d1596 --- /dev/null +++ b/src/video_core/vertex_loader.h @@ -0,0 +1,28 @@ +#pragma once + +#include <iterator> +#include <algorithm> + +#include "video_core/pica.h" +#include "video_core/shader/shader.h" +#include "video_core/debug_utils/debug_utils.h" + +namespace Pica { + +class VertexLoader { +public: + void Setup(const Pica::Regs& regs); + void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses); + + int GetNumTotalAttributes() const { return num_total_attributes; } + +private: + u32 vertex_attribute_sources[16]; + u32 vertex_attribute_strides[16] = {}; + Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; + u32 vertex_attribute_elements[16] = {}; + bool vertex_attribute_is_default[16]; + int num_total_attributes; +}; + +} // namespace Pica |