summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp60
-rw-r--r--src/video_core/debug_utils/debug_utils.h4
-rw-r--r--src/video_core/pica.h43
-rw-r--r--src/video_core/rasterizer.cpp142
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
5 files changed, 241 insertions, 14 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 110caec76..2a1c885a7 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -123,12 +123,55 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value());
PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value());
+ if (g_debug_context) {
+ for (int i = 0; i < 3; ++i) {
+ const auto texture = regs.GetTextures()[i];
+ if (!texture.enabled)
+ continue;
+
+ u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
+ if (g_debug_context && Pica::g_debug_context->recorder)
+ g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress());
+ }
+ }
+
+ class {
+ /// Combine overlapping and close ranges
+ void SimplifyRanges() {
+ for (auto it = ranges.begin(); it != ranges.end(); ++it) {
+ // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
+ auto it2 = std::next(it);
+ while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
+ it->second = std::max(it->second, it2->first + it2->second - it->first);
+ it2 = ranges.erase(it2);
+ }
+ }
+ }
+
+ public:
+ /// Record a particular memory access in the list
+ void AddAccess(u32 paddr, u32 size) {
+ // Create new range or extend existing one
+ ranges[paddr] = std::max(ranges[paddr], size);
+
+ // Simplify ranges...
+ SimplifyRanges();
+ }
+
+ /// Map of accessed ranges (mapping start address to range size)
+ std::map<u32, u32> ranges;
+ } memory_accesses;
+
for (unsigned int index = 0; index < regs.num_vertices; ++index)
{
unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
if (is_indexed) {
// TODO: Implement some sort of vertex cache!
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ int size = index_u16 ? 2 : 1;
+ memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
+ }
}
// Initialize data for the current vertex
@@ -151,7 +194,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
// Load per-vertex data from the loader arrays
for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
- const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]);
+ u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
+ const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
+
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ memory_accesses.AddAccess(source_addr,
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
+ : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
+ }
const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
@@ -213,14 +263,20 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
}
}
+ for (auto& range : memory_accesses.ranges) {
+ g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
+ range.second, range.first);
+ }
+
if (Settings::values.use_hw_renderer) {
VideoCore::g_renderer->hw_rasterizer->DrawTriangles();
}
geometry_dumper.Dump();
- if (g_debug_context)
+ if (g_debug_context) {
g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
break;
}
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 7926d64ec..2573292e2 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -14,6 +14,8 @@
#include "common/vector_math.h"
+#include "core/tracer/recorder.h"
+
#include "video_core/pica.h"
namespace Pica {
@@ -129,6 +131,8 @@ public:
Event active_breakpoint;
bool at_breakpoint = false;
+ std::shared_ptr<CiTrace::Recorder> recorder = nullptr;
+
private:
/**
* Private default constructor to make sure people always construct this through Construct()
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index feb20214a..46a7b21dc 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -420,6 +420,11 @@ struct Regs {
GreaterThanOrEqual = 7,
};
+ enum class StencilAction : u32 {
+ Keep = 0,
+ Xor = 5,
+ };
+
struct {
union {
// If false, logic blending is used
@@ -454,15 +459,35 @@ struct Regs {
BitField< 8, 8, u32> ref;
} alpha_test;
- union {
- BitField< 0, 1, u32> stencil_test_enable;
- BitField< 4, 3, CompareFunc> stencil_test_func;
- BitField< 8, 8, u32> stencil_replacement_value;
- BitField<16, 8, u32> stencil_reference_value;
- BitField<24, 8, u32> stencil_mask;
- } stencil_test;
+ struct {
+ union {
+ // If true, enable stencil testing
+ BitField< 0, 1, u32> enable;
- INSERT_PADDING_WORDS(0x1);
+ // Comparison operation for stencil testing
+ BitField< 4, 3, CompareFunc> func;
+
+ // Value to calculate the new stencil value from
+ BitField< 8, 8, u32> replacement_value;
+
+ // Value to compare against for stencil testing
+ BitField<16, 8, u32> reference_value;
+
+ // Mask to apply on stencil test inputs
+ BitField<24, 8, u32> mask;
+ };
+
+ union {
+ // Action to perform when the stencil test fails
+ BitField< 0, 3, StencilAction> action_stencil_fail;
+
+ // Action to perform when stencil testing passed but depth testing fails
+ BitField< 4, 3, StencilAction> action_depth_fail;
+
+ // Action to perform when both stencil and depth testing pass
+ BitField< 8, 3, StencilAction> action_depth_pass;
+ };
+ } stencil_test;
union {
BitField< 0, 1, u32> depth_test_enable;
@@ -512,7 +537,7 @@ struct Regs {
struct {
INSERT_PADDING_WORDS(0x6);
- DepthFormat depth_format;
+ DepthFormat depth_format; // TODO: Should be a BitField!
BitField<16, 3, ColorFormat> color_format;
INSERT_PADDING_WORDS(0x4);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 70b115744..c381c2bd9 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) {
}
}
+static u8 GetStencil(int x, int y) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* src_pixel = depth_buffer + src_offset;
+
+ switch (framebuffer.depth_format) {
+ case Regs::DepthFormat::D24S8:
+ return Color::DecodeD24S8(src_pixel).y;
+
+ default:
+ LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format);
+ return 0;
+ }
+}
+
static void SetDepth(int x, int y, u32 value) {
const auto& framebuffer = g_state.regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
@@ -144,13 +168,46 @@ static void SetDepth(int x, int y, u32 value) {
case Regs::DepthFormat::D16:
Color::EncodeD16(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24:
Color::EncodeD24(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24S8:
- // TODO(Subv): Implement the stencil buffer
- Color::EncodeD24S8(value, 0, dst_pixel);
+ Color::EncodeD24X8(value, dst_pixel);
break;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+ UNIMPLEMENTED();
+ break;
+ }
+}
+
+static void SetStencil(int x, int y, u8 value) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* dst_pixel = depth_buffer + dst_offset;
+
+ switch (framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ case Pica::Regs::DepthFormat::D24:
+ // Nothing to do
+ break;
+
+ case Pica::Regs::DepthFormat::D24S8:
+ Color::EncodeX24S8(value, dst_pixel);
+ break;
+
default:
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
UNIMPLEMENTED();
@@ -158,6 +215,22 @@ static void SetDepth(int x, int y, u32 value) {
}
}
+// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
+static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) {
+ switch (action) {
+ case Regs::StencilAction::Keep:
+ return dest;
+
+ case Regs::StencilAction::Xor:
+ return dest ^ ref;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
+ UNIMPLEMENTED();
+ return 0;
+ }
+}
+
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
struct Fix12P4 {
Fix12P4() {}
@@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
auto textures = regs.GetTextures();
auto tev_stages = regs.GetTevStages();
+ bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
+ const auto stencil_test = g_state.regs.output_merger.stencil_test;
+
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
// TODO: Not sure if looping through x first might be faster
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
@@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
}
const auto& output_merger = regs.output_merger;
+ // TODO: Does alpha testing happen before or after stencil?
if (output_merger.alpha_test.enable) {
bool pass = false;
@@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
continue;
}
+ u8 old_stencil = 0;
+ if (stencil_action_enable) {
+ old_stencil = GetStencil(x >> 4, y >> 4);
+ u8 dest = old_stencil & stencil_test.mask;
+ u8 ref = stencil_test.reference_value & stencil_test.mask;
+
+ bool pass = false;
+ switch (stencil_test.func) {
+ case Regs::CompareFunc::Never:
+ pass = false;
+ break;
+
+ case Regs::CompareFunc::Always:
+ pass = true;
+ break;
+
+ case Regs::CompareFunc::Equal:
+ pass = (ref == dest);
+ break;
+
+ case Regs::CompareFunc::NotEqual:
+ pass = (ref != dest);
+ break;
+
+ case Regs::CompareFunc::LessThan:
+ pass = (ref < dest);
+ break;
+
+ case Regs::CompareFunc::LessThanOrEqual:
+ pass = (ref <= dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThan:
+ pass = (ref > dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThanOrEqual:
+ pass = (ref >= dest);
+ break;
+ }
+
+ if (!pass) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ continue;
+ }
+ }
+
// TODO: Does depth indeed only get written even if depth testing is enabled?
if (output_merger.depth_test_enable) {
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
@@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
break;
}
- if (!pass)
+ if (!pass) {
+ if (stencil_action_enable) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
continue;
+ }
if (output_merger.depth_write_enable)
SetDepth(x >> 4, y >> 4, z);
+
+ if (stencil_action_enable) {
+ // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
}
auto dest = GetPixel(x >> 4, y >> 4);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 9799f74fa..96e12839a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -22,6 +22,8 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_shaders.h"
+#include "video_core/debug_utils/debug_utils.h"
+
/**
* Vertex structure that the drawn screen rectangles are composed of.
*/
@@ -129,6 +131,10 @@ void RendererOpenGL::SwapBuffers() {
hw_rasterizer->Reset();
}
}
+
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ Pica::g_debug_context->recorder->FrameFinished();
+ }
}
/**