diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/command_processor.cpp | 60 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 4 | ||||
-rw-r--r-- | src/video_core/pica.h | 43 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 142 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 6 |
5 files changed, 241 insertions, 14 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 110caec76..2a1c885a7 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -123,12 +123,55 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value()); PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value()); + if (g_debug_context) { + for (int i = 0; i < 3; ++i) { + const auto texture = regs.GetTextures()[i]; + if (!texture.enabled) + continue; + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + if (g_debug_context && Pica::g_debug_context->recorder) + g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress()); + } + } + + class { + /// Combine overlapping and close ranges + void SimplifyRanges() { + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too + auto it2 = std::next(it); + while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { + it->second = std::max(it->second, it2->first + it2->second - it->first); + it2 = ranges.erase(it2); + } + } + } + + public: + /// Record a particular memory access in the list + void AddAccess(u32 paddr, u32 size) { + // Create new range or extend existing one + ranges[paddr] = std::max(ranges[paddr], size); + + // Simplify ranges... + SimplifyRanges(); + } + + /// Map of accessed ranges (mapping start address to range size) + std::map<u32, u32> ranges; + } memory_accesses; + for (unsigned int index = 0; index < regs.num_vertices; ++index) { unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; if (is_indexed) { // TODO: Implement some sort of vertex cache! + if (g_debug_context && Pica::g_debug_context->recorder) { + int size = index_u16 ? 2 : 1; + memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); + } } // Initialize data for the current vertex @@ -151,7 +194,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Load per-vertex data from the loader arrays for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]); + u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; + const u8* srcdata = Memory::GetPhysicalPointer(source_addr); + + if (g_debug_context && Pica::g_debug_context->recorder) { + memory_accesses.AddAccess(source_addr, + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + } const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : @@ -213,14 +263,20 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } } + for (auto& range : memory_accesses.ranges) { + g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), + range.second, range.first); + } + if (Settings::values.use_hw_renderer) { VideoCore::g_renderer->hw_rasterizer->DrawTriangles(); } geometry_dumper.Dump(); - if (g_debug_context) + if (g_debug_context) { g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } break; } diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 7926d64ec..2573292e2 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -14,6 +14,8 @@ #include "common/vector_math.h" +#include "core/tracer/recorder.h" + #include "video_core/pica.h" namespace Pica { @@ -129,6 +131,8 @@ public: Event active_breakpoint; bool at_breakpoint = false; + std::shared_ptr<CiTrace::Recorder> recorder = nullptr; + private: /** * Private default constructor to make sure people always construct this through Construct() diff --git a/src/video_core/pica.h b/src/video_core/pica.h index feb20214a..46a7b21dc 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -420,6 +420,11 @@ struct Regs { GreaterThanOrEqual = 7, }; + enum class StencilAction : u32 { + Keep = 0, + Xor = 5, + }; + struct { union { // If false, logic blending is used @@ -454,15 +459,35 @@ struct Regs { BitField< 8, 8, u32> ref; } alpha_test; - union { - BitField< 0, 1, u32> stencil_test_enable; - BitField< 4, 3, CompareFunc> stencil_test_func; - BitField< 8, 8, u32> stencil_replacement_value; - BitField<16, 8, u32> stencil_reference_value; - BitField<24, 8, u32> stencil_mask; - } stencil_test; + struct { + union { + // If true, enable stencil testing + BitField< 0, 1, u32> enable; - INSERT_PADDING_WORDS(0x1); + // Comparison operation for stencil testing + BitField< 4, 3, CompareFunc> func; + + // Value to calculate the new stencil value from + BitField< 8, 8, u32> replacement_value; + + // Value to compare against for stencil testing + BitField<16, 8, u32> reference_value; + + // Mask to apply on stencil test inputs + BitField<24, 8, u32> mask; + }; + + union { + // Action to perform when the stencil test fails + BitField< 0, 3, StencilAction> action_stencil_fail; + + // Action to perform when stencil testing passed but depth testing fails + BitField< 4, 3, StencilAction> action_depth_fail; + + // Action to perform when both stencil and depth testing pass + BitField< 8, 3, StencilAction> action_depth_pass; + }; + } stencil_test; union { BitField< 0, 1, u32> depth_test_enable; @@ -512,7 +537,7 @@ struct Regs { struct { INSERT_PADDING_WORDS(0x6); - DepthFormat depth_format; + DepthFormat depth_format; // TODO: Should be a BitField! BitField<16, 3, ColorFormat> color_format; INSERT_PADDING_WORDS(0x4); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 70b115744..c381c2bd9 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) { } } +static u8 GetStencil(int x, int y) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* src_pixel = depth_buffer + src_offset; + + switch (framebuffer.depth_format) { + case Regs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).y; + + default: + LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); + return 0; + } +} + static void SetDepth(int x, int y, u32 value) { const auto& framebuffer = g_state.regs.framebuffer; const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); @@ -144,13 +168,46 @@ static void SetDepth(int x, int y, u32 value) { case Regs::DepthFormat::D16: Color::EncodeD16(value, dst_pixel); break; + case Regs::DepthFormat::D24: Color::EncodeD24(value, dst_pixel); break; + case Regs::DepthFormat::D24S8: - // TODO(Subv): Implement the stencil buffer - Color::EncodeD24S8(value, 0, dst_pixel); + Color::EncodeD24X8(value, dst_pixel); break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; + } +} + +static void SetStencil(int x, int y, u8 value) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* dst_pixel = depth_buffer + dst_offset; + + switch (framebuffer.depth_format) { + case Pica::Regs::DepthFormat::D16: + case Pica::Regs::DepthFormat::D24: + // Nothing to do + break; + + case Pica::Regs::DepthFormat::D24S8: + Color::EncodeX24S8(value, dst_pixel); + break; + default: LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); UNIMPLEMENTED(); @@ -158,6 +215,22 @@ static void SetDepth(int x, int y, u32 value) { } } +// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! +static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { + switch (action) { + case Regs::StencilAction::Keep: + return dest; + + case Regs::StencilAction::Xor: + return dest ^ ref; + + default: + LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); + UNIMPLEMENTED(); + return 0; + } +} + // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values struct Fix12P4 { Fix12P4() {} @@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto textures = regs.GetTextures(); auto tev_stages = regs.GetTevStages(); + bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; + const auto stencil_test = g_state.regs.output_merger.stencil_test; + // Enter rasterization loop, starting at the center of the topleft bounding box corner. // TODO: Not sure if looping through x first might be faster for (u16 y = min_y + 8; y < max_y; y += 0x10) { @@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } const auto& output_merger = regs.output_merger; + // TODO: Does alpha testing happen before or after stencil? if (output_merger.alpha_test.enable) { bool pass = false; @@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, continue; } + u8 old_stencil = 0; + if (stencil_action_enable) { + old_stencil = GetStencil(x >> 4, y >> 4); + u8 dest = old_stencil & stencil_test.mask; + u8 ref = stencil_test.reference_value & stencil_test.mask; + + bool pass = false; + switch (stencil_test.func) { + case Regs::CompareFunc::Never: + pass = false; + break; + + case Regs::CompareFunc::Always: + pass = true; + break; + + case Regs::CompareFunc::Equal: + pass = (ref == dest); + break; + + case Regs::CompareFunc::NotEqual: + pass = (ref != dest); + break; + + case Regs::CompareFunc::LessThan: + pass = (ref < dest); + break; + + case Regs::CompareFunc::LessThanOrEqual: + pass = (ref <= dest); + break; + + case Regs::CompareFunc::GreaterThan: + pass = (ref > dest); + break; + + case Regs::CompareFunc::GreaterThanOrEqual: + pass = (ref >= dest); + break; + } + + if (!pass) { + u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + continue; + } + } + // TODO: Does depth indeed only get written even if depth testing is enabled? if (output_merger.depth_test_enable) { unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); @@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, break; } - if (!pass) + if (!pass) { + if (stencil_action_enable) { + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } continue; + } if (output_merger.depth_write_enable) SetDepth(x >> 4, y >> 4, z); + + if (stencil_action_enable) { + // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } } auto dest = GetPixel(x >> 4, y >> 4); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 9799f74fa..96e12839a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -22,6 +22,8 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shaders.h" +#include "video_core/debug_utils/debug_utils.h" + /** * Vertex structure that the drawn screen rectangles are composed of. */ @@ -129,6 +131,10 @@ void RendererOpenGL::SwapBuffers() { hw_rasterizer->Reset(); } } + + if (Pica::g_debug_context && Pica::g_debug_context->recorder) { + Pica::g_debug_context->recorder->FrameFinished(); + } } /** |