diff options
Diffstat (limited to 'src/video_core')
36 files changed, 2129 insertions, 459 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2a924f4ad..76cfd4f7d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -11,8 +11,10 @@ set(SRCS pica.cpp primitive_assembly.cpp rasterizer.cpp + renderer_base.cpp shader/shader.cpp shader/shader_interpreter.cpp + swrasterizer.cpp utils.cpp video_core.cpp ) @@ -30,13 +32,16 @@ set(HEADERS clipper.h command_processor.h gpu_debugger.h - hwrasterizer_base.h pica.h + pica_state.h + pica_types.h primitive_assembly.h rasterizer.h + rasterizer_interface.h renderer_base.h shader/shader.h shader/shader_interpreter.h + swrasterizer.h utils.h video_core.h ) diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index ed99c4f13..3d503486e 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -6,6 +6,7 @@ #include "video_core/clipper.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/rasterizer.h" #include "video_core/shader/shader_interpreter.h" @@ -59,15 +60,17 @@ static void InitScreenCoordinates(OutputVertex& vtx) } viewport; const auto& regs = g_state.regs; - viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); - viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); + viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); + viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); - viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range); - viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane); + viewport.zscale = float24::FromRaw(regs.viewport_depth_range); + viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; + vtx.view *= inv_w; + vtx.quat *= inv_w; vtx.tc0 *= inv_w; vtx.tc1 *= inv_w; vtx.tc2 *= inv_w; @@ -78,7 +81,7 @@ static void InitScreenCoordinates(OutputVertex& vtx) vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; } -void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { +void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { using boost::container::static_vector; // Clipping a planar n-gon against a plane will remove at least 1 vertex and introduces 2 at diff --git a/src/video_core/clipper.h b/src/video_core/clipper.h index 6ed01e877..f85d8d4c9 100644 --- a/src/video_core/clipper.h +++ b/src/video_core/clipper.h @@ -14,7 +14,7 @@ namespace Clipper { using Shader::OutputVertex; -void ProcessTriangle(OutputVertex& v0, OutputVertex& v1, OutputVertex& v2); +void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2); } // namespace diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index bd1b09a4b..2274dfa66 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -15,6 +15,7 @@ #include "video_core/clipper.h" #include "video_core/command_processor.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/primitive_assembly.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" @@ -73,6 +74,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); break; + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.index, 0x232): + if (regs.vs_default_attributes_setup.index == 15) { + // Reset immediate primitive state + g_state.immediate.primitive_assembler.Reconfigure(regs.triangle_topology); + g_state.immediate.attribute_id = 0; + } + break; + // Load default vertex input attributes case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): @@ -98,21 +107,58 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); + attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), attribute.w.ToFloat32()); // TODO: Verify that this actually modifies the register! - setup.index = setup.index + 1; + if (setup.index < 15) { + setup.index++; + } else { + // Put each attribute into an immediate input buffer. + // When all specified immediate attributes are present, the Vertex Shader is invoked and everything is + // sent to the primitive assembler. + + auto& immediate_input = g_state.immediate.input; + auto& immediate_attribute_id = g_state.immediate.attribute_id; + const auto& attribute_config = regs.vertex_attributes; + + immediate_input.attr[immediate_attribute_id++] = attribute; + + if (immediate_attribute_id >= attribute_config.GetNumTotalAttributes()) { + immediate_attribute_id = 0; + + Shader::UnitState<false> shader_unit; + Shader::Setup(shader_unit); + + // Send to vertex shader + Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, attribute_config.GetNumTotalAttributes()); + + // Send to renderer + using Pica::Shader::OutputVertex; + auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { + VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); + }; + + g_state.immediate.primitive_assembler.SubmitVertex(output, AddTriangle); + } + } } break; } + case PICA_REG_INDEX(gpu_mode): + if (regs.gpu_mode == Regs::GPUMode::Configuring && regs.vs_default_attributes_setup.index == 15) { + // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); + } + break; + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): { @@ -157,15 +203,25 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // TODO: What happens if a loader overwrites a previous one's data? for (unsigned component = 0; component < loader_config.component_count; ++component) { - if (component >= 12) + if (component >= 12) { LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); + continue; + } + u32 attribute_index = loader_config.GetComponent(component); - vertex_attribute_sources[attribute_index] = load_address; - vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); - vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); - vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); - vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); - load_address += attribute_config.GetStride(attribute_index); + if (attribute_index < 12) { + vertex_attribute_sources[attribute_index] = load_address; + vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); + vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); + vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); + vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); + load_address += attribute_config.GetStride(attribute_index); + } else if (attribute_index < 16) { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively + load_address += (attribute_index - 11) * 4; + } else { + UNREACHABLE(); // This is truly unreachable due to the number of bits for each component + } } } @@ -336,19 +392,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { } } - if (Settings::values.use_hw_renderer) { - // Send to hardware renderer - static auto AddHWTriangle = [](const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) { - VideoCore::g_renderer->hw_rasterizer->AddTriangle(v0, v1, v2); - }; + // Send to renderer + using Pica::Shader::OutputVertex; + auto AddTriangle = []( + const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) { + VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); + }; - primitive_assembler.SubmitVertex(output, AddHWTriangle); - } else { - // Send to triangle clipper - primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); - } + primitive_assembler.SubmitVertex(output, AddTriangle); } for (auto& range : memory_accesses.ranges) { @@ -356,9 +407,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { range.second, range.first); } - if (Settings::values.use_hw_renderer) { - VideoCore::g_renderer->hw_rasterizer->DrawTriangles(); - } + VideoCore::g_renderer->Rasterizer()->DrawTriangles(); #if PICA_DUMP_GEOMETRY geometry_dumper.Dump(); @@ -425,10 +474,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); } else { // TODO: Untested - uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); - uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); - uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); - uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); + uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); + uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); + uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); + uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); } LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, @@ -436,7 +485,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { uniform.w.ToFloat32()); // TODO: Verify that this actually modifies the register! - uniform_setup.index = uniform_setup.index + 1; + uniform_setup.index.Assign(uniform_setup.index + 1); } break; } @@ -471,11 +520,29 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { break; } + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): + { + auto& lut_config = regs.lighting.lut_config; + + ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); + + g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; + lut_config.index.Assign(lut_config.index + 1); + break; + } + default: break; } - VideoCore::g_renderer->hw_rasterizer->NotifyPicaRegisterChanged(id); + VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id); if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id)); @@ -493,9 +560,8 @@ void ProcessCommandList(const u32* list, u32 size) { u32 value = *g_state.cmd_list.current_ptr++; const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; - u32 cmd = header.cmd_id; - WritePicaReg(cmd, value, header.parameter_mask); + WritePicaReg(header.cmd_id, value, header.parameter_mask); for (unsigned i = 0; i < header.extra_data_length; ++i) { u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index f1cfa9361..271e81ca1 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -28,6 +28,7 @@ #include "core/settings.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/renderer_base.h" #include "video_core/utils.h" #include "video_core/video_core.h" @@ -46,10 +47,8 @@ void DebugContext::OnEvent(Event event, void* data) { { std::unique_lock<std::mutex> lock(breakpoint_mutex); - if (Settings::values.use_hw_renderer) { - // Commit the hardware renderer's framebuffer so it will show on debug widgets - VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer(); - } + // Commit the hardware renderer's framebuffer so it will show on debug widgets + VideoCore::g_renderer->Rasterizer()->FlushFramebuffer(); // TODO: Should stop the CPU thread here once we multithread emulation. @@ -115,7 +114,7 @@ void GeometryDumper::Dump() { } -void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) +void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) { struct StuffToWrite { u8* pointer; @@ -203,11 +202,11 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c if (it == output_info_table.end()) { output_info_table.emplace_back(); - output_info_table.back().type = type; - output_info_table.back().component_mask = component_mask; - output_info_table.back().id = i; + output_info_table.back().type.Assign(type); + output_info_table.back().component_mask.Assign(component_mask); + output_info_table.back().id.Assign(i); } else { - it->component_mask = it->component_mask | component_mask; + it->component_mask.Assign(it->component_mask | component_mask); } } catch (const std::out_of_range& ) { DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 85762f5b4..795160a32 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -17,6 +17,7 @@ #include "core/tracer/recorder.h" #include "video_core/pica.h" +#include "video_core/shader/shader.h" namespace Pica { @@ -182,7 +183,7 @@ private: }; void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, - const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); + const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); // Utility class to log Pica commands. diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index 8c528989e..32ad72674 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -6,72 +6,481 @@ #include <unordered_map> #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/shader/shader.h" namespace Pica { State g_state; +static const std::pair<u16, const char*> register_names[] = { + { 0x010, "GPUREG_FINALIZE" }, + + { 0x040, "GPUREG_FACECULLING_CONFIG" }, + { 0x041, "GPUREG_VIEWPORT_WIDTH" }, + { 0x042, "GPUREG_VIEWPORT_INVW" }, + { 0x043, "GPUREG_VIEWPORT_HEIGHT" }, + { 0x044, "GPUREG_VIEWPORT_INVH" }, + + { 0x047, "GPUREG_FRAGOP_CLIP" }, + { 0x048, "GPUREG_FRAGOP_CLIP_DATA0" }, + { 0x049, "GPUREG_FRAGOP_CLIP_DATA1" }, + { 0x04A, "GPUREG_FRAGOP_CLIP_DATA2" }, + { 0x04B, "GPUREG_FRAGOP_CLIP_DATA3" }, + + { 0x04D, "GPUREG_DEPTHMAP_SCALE" }, + { 0x04E, "GPUREG_DEPTHMAP_OFFSET" }, + { 0x04F, "GPUREG_SH_OUTMAP_TOTAL" }, + { 0x050, "GPUREG_SH_OUTMAP_O0" }, + { 0x051, "GPUREG_SH_OUTMAP_O1" }, + { 0x052, "GPUREG_SH_OUTMAP_O2" }, + { 0x053, "GPUREG_SH_OUTMAP_O3" }, + { 0x054, "GPUREG_SH_OUTMAP_O4" }, + { 0x055, "GPUREG_SH_OUTMAP_O5" }, + { 0x056, "GPUREG_SH_OUTMAP_O6" }, + + { 0x061, "GPUREG_EARLYDEPTH_FUNC" }, + { 0x062, "GPUREG_EARLYDEPTH_TEST1" }, + { 0x063, "GPUREG_EARLYDEPTH_CLEAR" }, + { 0x064, "GPUREG_SH_OUTATTR_MODE" }, + { 0x065, "GPUREG_SCISSORTEST_MODE" }, + { 0x066, "GPUREG_SCISSORTEST_POS" }, + { 0x067, "GPUREG_SCISSORTEST_DIM" }, + { 0x068, "GPUREG_VIEWPORT_XY" }, + + { 0x06A, "GPUREG_EARLYDEPTH_DATA" }, + + { 0x06D, "GPUREG_DEPTHMAP_ENABLE" }, + { 0x06E, "GPUREG_RENDERBUF_DIM" }, + { 0x06F, "GPUREG_SH_OUTATTR_CLOCK" }, + + { 0x080, "GPUREG_TEXUNIT_CONFIG" }, + { 0x081, "GPUREG_TEXUNIT0_BORDER_COLOR" }, + { 0x082, "GPUREG_TEXUNIT0_DIM" }, + { 0x083, "GPUREG_TEXUNIT0_PARAM" }, + { 0x084, "GPUREG_TEXUNIT0_LOD" }, + { 0x085, "GPUREG_TEXUNIT0_ADDR1" }, + { 0x086, "GPUREG_TEXUNIT0_ADDR2" }, + { 0x087, "GPUREG_TEXUNIT0_ADDR3" }, + { 0x088, "GPUREG_TEXUNIT0_ADDR4" }, + { 0x089, "GPUREG_TEXUNIT0_ADDR5" }, + { 0x08A, "GPUREG_TEXUNIT0_ADDR6" }, + { 0x08B, "GPUREG_TEXUNIT0_SHADOW" }, + + { 0x08E, "GPUREG_TEXUNIT0_TYPE" }, + { 0x08F, "GPUREG_LIGHTING_ENABLE0" }, + + { 0x091, "GPUREG_TEXUNIT1_BORDER_COLOR" }, + { 0x092, "GPUREG_TEXUNIT1_DIM" }, + { 0x093, "GPUREG_TEXUNIT1_PARAM" }, + { 0x094, "GPUREG_TEXUNIT1_LOD" }, + { 0x095, "GPUREG_TEXUNIT1_ADDR" }, + { 0x096, "GPUREG_TEXUNIT1_TYPE" }, + + { 0x099, "GPUREG_TEXUNIT2_BORDER_COLOR" }, + { 0x09A, "GPUREG_TEXUNIT2_DIM" }, + { 0x09B, "GPUREG_TEXUNIT2_PARAM" }, + { 0x09C, "GPUREG_TEXUNIT2_LOD" }, + { 0x09D, "GPUREG_TEXUNIT2_ADDR" }, + { 0x09E, "GPUREG_TEXUNIT2_TYPE" }, + + { 0x0A8, "GPUREG_TEXUNIT3_PROCTEX0" }, + { 0x0A9, "GPUREG_TEXUNIT3_PROCTEX1" }, + { 0x0AA, "GPUREG_TEXUNIT3_PROCTEX2" }, + { 0x0AB, "GPUREG_TEXUNIT3_PROCTEX3" }, + { 0x0AC, "GPUREG_TEXUNIT3_PROCTEX4" }, + { 0x0AD, "GPUREG_TEXUNIT3_PROCTEX5" }, + + { 0x0AF, "GPUREG_PROCTEX_LUT" }, + { 0x0B0, "GPUREG_PROCTEX_LUT_DATA0" }, + { 0x0B1, "GPUREG_PROCTEX_LUT_DATA1" }, + { 0x0B2, "GPUREG_PROCTEX_LUT_DATA2" }, + { 0x0B3, "GPUREG_PROCTEX_LUT_DATA3" }, + { 0x0B4, "GPUREG_PROCTEX_LUT_DATA4" }, + { 0x0B5, "GPUREG_PROCTEX_LUT_DATA5" }, + { 0x0B6, "GPUREG_PROCTEX_LUT_DATA6" }, + { 0x0B7, "GPUREG_PROCTEX_LUT_DATA7" }, + + { 0x0C0, "GPUREG_TEXENV0_SOURCE" }, + { 0x0C1, "GPUREG_TEXENV0_OPERAND" }, + { 0x0C2, "GPUREG_TEXENV0_COMBINER" }, + { 0x0C3, "GPUREG_TEXENV0_COLOR" }, + { 0x0C4, "GPUREG_TEXENV0_SCALE" }, + + { 0x0C8, "GPUREG_TEXENV1_SOURCE" }, + { 0x0C9, "GPUREG_TEXENV1_OPERAND" }, + { 0x0CA, "GPUREG_TEXENV1_COMBINER" }, + { 0x0CB, "GPUREG_TEXENV1_COLOR" }, + { 0x0CC, "GPUREG_TEXENV1_SCALE" }, + + { 0x0D0, "GPUREG_TEXENV2_SOURCE" }, + { 0x0D1, "GPUREG_TEXENV2_OPERAND" }, + { 0x0D2, "GPUREG_TEXENV2_COMBINER" }, + { 0x0D3, "GPUREG_TEXENV2_COLOR" }, + { 0x0D4, "GPUREG_TEXENV2_SCALE" }, + + { 0x0D8, "GPUREG_TEXENV3_SOURCE" }, + { 0x0D9, "GPUREG_TEXENV3_OPERAND" }, + { 0x0DA, "GPUREG_TEXENV3_COMBINER" }, + { 0x0DB, "GPUREG_TEXENV3_COLOR" }, + { 0x0DC, "GPUREG_TEXENV3_SCALE" }, + + { 0x0E0, "GPUREG_TEXENV_UPDATE_BUFFER" }, + { 0x0E1, "GPUREG_FOG_COLOR" }, + + { 0x0E4, "GPUREG_GAS_ATTENUATION" }, + { 0x0E5, "GPUREG_GAS_ACCMAX" }, + { 0x0E6, "GPUREG_FOG_LUT_INDEX" }, + + { 0x0E8, "GPUREG_FOG_LUT_DATA0" }, + { 0x0E9, "GPUREG_FOG_LUT_DATA1" }, + { 0x0EA, "GPUREG_FOG_LUT_DATA2" }, + { 0x0EB, "GPUREG_FOG_LUT_DATA3" }, + { 0x0EC, "GPUREG_FOG_LUT_DATA4" }, + { 0x0ED, "GPUREG_FOG_LUT_DATA5" }, + { 0x0EE, "GPUREG_FOG_LUT_DATA6" }, + { 0x0EF, "GPUREG_FOG_LUT_DATA7" }, + { 0x0F0, "GPUREG_TEXENV4_SOURCE" }, + { 0x0F1, "GPUREG_TEXENV4_OPERAND" }, + { 0x0F2, "GPUREG_TEXENV4_COMBINER" }, + { 0x0F3, "GPUREG_TEXENV4_COLOR" }, + { 0x0F4, "GPUREG_TEXENV4_SCALE" }, + + { 0x0F8, "GPUREG_TEXENV5_SOURCE" }, + { 0x0F9, "GPUREG_TEXENV5_OPERAND" }, + { 0x0FA, "GPUREG_TEXENV5_COMBINER" }, + { 0x0FB, "GPUREG_TEXENV5_COLOR" }, + { 0x0FC, "GPUREG_TEXENV5_SCALE" }, + { 0x0FD, "GPUREG_TEXENV_BUFFER_COLOR" }, + + { 0x100, "GPUREG_COLOR_OPERATION" }, + { 0x101, "GPUREG_BLEND_FUNC" }, + { 0x102, "GPUREG_LOGIC_OP" }, + { 0x103, "GPUREG_BLEND_COLOR" }, + { 0x104, "GPUREG_FRAGOP_ALPHA_TEST" }, + { 0x105, "GPUREG_STENCIL_TEST" }, + { 0x106, "GPUREG_STENCIL_OP" }, + { 0x107, "GPUREG_DEPTH_COLOR_MASK" }, + + { 0x110, "GPUREG_FRAMEBUFFER_INVALIDATE" }, + { 0x111, "GPUREG_FRAMEBUFFER_FLUSH" }, + { 0x112, "GPUREG_COLORBUFFER_READ" }, + { 0x113, "GPUREG_COLORBUFFER_WRITE" }, + { 0x114, "GPUREG_DEPTHBUFFER_READ" }, + { 0x115, "GPUREG_DEPTHBUFFER_WRITE" }, + { 0x116, "GPUREG_DEPTHBUFFER_FORMAT" }, + { 0x117, "GPUREG_COLORBUFFER_FORMAT" }, + { 0x118, "GPUREG_EARLYDEPTH_TEST2" }, + + { 0x11B, "GPUREG_FRAMEBUFFER_BLOCK32" }, + { 0x11C, "GPUREG_DEPTHBUFFER_LOC" }, + { 0x11D, "GPUREG_COLORBUFFER_LOC" }, + { 0x11E, "GPUREG_FRAMEBUFFER_DIM" }, + + { 0x120, "GPUREG_GAS_LIGHT_XY" }, + { 0x121, "GPUREG_GAS_LIGHT_Z" }, + { 0x122, "GPUREG_GAS_LIGHT_Z_COLOR" }, + { 0x123, "GPUREG_GAS_LUT_INDEX" }, + { 0x124, "GPUREG_GAS_LUT_DATA" }, + + { 0x126, "GPUREG_GAS_DELTAZ_DEPTH" }, + + { 0x130, "GPUREG_FRAGOP_SHADOW" }, + + { 0x140, "GPUREG_LIGHT0_SPECULAR0" }, + { 0x141, "GPUREG_LIGHT0_SPECULAR1" }, + { 0x142, "GPUREG_LIGHT0_DIFFUSE" }, + { 0x143, "GPUREG_LIGHT0_AMBIENT" }, + { 0x144, "GPUREG_LIGHT0_XY" }, + { 0x145, "GPUREG_LIGHT0_Z" }, + { 0x146, "GPUREG_LIGHT0_SPOTDIR_XY" }, + { 0x147, "GPUREG_LIGHT0_SPOTDIR_Z" }, + + { 0x149, "GPUREG_LIGHT0_CONFIG" }, + { 0x14A, "GPUREG_LIGHT0_ATTENUATION_BIAS" }, + { 0x14B, "GPUREG_LIGHT0_ATTENUATION_SCALE" }, + + { 0x150, "GPUREG_LIGHT1_SPECULAR0" }, + { 0x151, "GPUREG_LIGHT1_SPECULAR1" }, + { 0x152, "GPUREG_LIGHT1_DIFFUSE" }, + { 0x153, "GPUREG_LIGHT1_AMBIENT" }, + { 0x154, "GPUREG_LIGHT1_XY" }, + { 0x155, "GPUREG_LIGHT1_Z" }, + { 0x156, "GPUREG_LIGHT1_SPOTDIR_XY" }, + { 0x157, "GPUREG_LIGHT1_SPOTDIR_Z" }, + + { 0x159, "GPUREG_LIGHT1_CONFIG" }, + { 0x15A, "GPUREG_LIGHT1_ATTENUATION_BIAS" }, + { 0x15B, "GPUREG_LIGHT1_ATTENUATION_SCALE" }, + + { 0x160, "GPUREG_LIGHT2_SPECULAR0" }, + { 0x161, "GPUREG_LIGHT2_SPECULAR1" }, + { 0x162, "GPUREG_LIGHT2_DIFFUSE" }, + { 0x163, "GPUREG_LIGHT2_AMBIENT" }, + { 0x164, "GPUREG_LIGHT2_XY" }, + { 0x165, "GPUREG_LIGHT2_Z" }, + { 0x166, "GPUREG_LIGHT2_SPOTDIR_XY" }, + { 0x167, "GPUREG_LIGHT2_SPOTDIR_Z" }, + + { 0x169, "GPUREG_LIGHT2_CONFIG" }, + { 0x16A, "GPUREG_LIGHT2_ATTENUATION_BIAS" }, + { 0x16B, "GPUREG_LIGHT2_ATTENUATION_SCALE" }, + + { 0x170, "GPUREG_LIGHT3_SPECULAR0" }, + { 0x171, "GPUREG_LIGHT3_SPECULAR1" }, + { 0x172, "GPUREG_LIGHT3_DIFFUSE" }, + { 0x173, "GPUREG_LIGHT3_AMBIENT" }, + { 0x174, "GPUREG_LIGHT3_XY" }, + { 0x175, "GPUREG_LIGHT3_Z" }, + { 0x176, "GPUREG_LIGHT3_SPOTDIR_XY" }, + { 0x177, "GPUREG_LIGHT3_SPOTDIR_Z" }, + + { 0x179, "GPUREG_LIGHT3_CONFIG" }, + { 0x17A, "GPUREG_LIGHT3_ATTENUATION_BIAS" }, + { 0x17B, "GPUREG_LIGHT3_ATTENUATION_SCALE" }, + + { 0x180, "GPUREG_LIGHT4_SPECULAR0" }, + { 0x181, "GPUREG_LIGHT4_SPECULAR1" }, + { 0x182, "GPUREG_LIGHT4_DIFFUSE" }, + { 0x183, "GPUREG_LIGHT4_AMBIENT" }, + { 0x184, "GPUREG_LIGHT4_XY" }, + { 0x185, "GPUREG_LIGHT4_Z" }, + { 0x186, "GPUREG_LIGHT4_SPOTDIR_XY" }, + { 0x187, "GPUREG_LIGHT4_SPOTDIR_Z" }, + + { 0x189, "GPUREG_LIGHT4_CONFIG" }, + { 0x18A, "GPUREG_LIGHT4_ATTENUATION_BIAS" }, + { 0x18B, "GPUREG_LIGHT4_ATTENUATION_SCALE" }, + + { 0x190, "GPUREG_LIGHT5_SPECULAR0" }, + { 0x191, "GPUREG_LIGHT5_SPECULAR1" }, + { 0x192, "GPUREG_LIGHT5_DIFFUSE" }, + { 0x193, "GPUREG_LIGHT5_AMBIENT" }, + { 0x194, "GPUREG_LIGHT5_XY" }, + { 0x195, "GPUREG_LIGHT5_Z" }, + { 0x196, "GPUREG_LIGHT5_SPOTDIR_XY" }, + { 0x197, "GPUREG_LIGHT5_SPOTDIR_Z" }, + + { 0x199, "GPUREG_LIGHT5_CONFIG" }, + { 0x19A, "GPUREG_LIGHT5_ATTENUATION_BIAS" }, + { 0x19B, "GPUREG_LIGHT5_ATTENUATION_SCALE" }, + + { 0x1A0, "GPUREG_LIGHT6_SPECULAR0" }, + { 0x1A1, "GPUREG_LIGHT6_SPECULAR1" }, + { 0x1A2, "GPUREG_LIGHT6_DIFFUSE" }, + { 0x1A3, "GPUREG_LIGHT6_AMBIENT" }, + { 0x1A4, "GPUREG_LIGHT6_XY" }, + { 0x1A5, "GPUREG_LIGHT6_Z" }, + { 0x1A6, "GPUREG_LIGHT6_SPOTDIR_XY" }, + { 0x1A7, "GPUREG_LIGHT6_SPOTDIR_Z" }, + + { 0x1A9, "GPUREG_LIGHT6_CONFIG" }, + { 0x1AA, "GPUREG_LIGHT6_ATTENUATION_BIAS" }, + { 0x1AB, "GPUREG_LIGHT6_ATTENUATION_SCALE" }, + + { 0x1B0, "GPUREG_LIGHT7_SPECULAR0" }, + { 0x1B1, "GPUREG_LIGHT7_SPECULAR1" }, + { 0x1B2, "GPUREG_LIGHT7_DIFFUSE" }, + { 0x1B3, "GPUREG_LIGHT7_AMBIENT" }, + { 0x1B4, "GPUREG_LIGHT7_XY" }, + { 0x1B5, "GPUREG_LIGHT7_Z" }, + { 0x1B6, "GPUREG_LIGHT7_SPOTDIR_XY" }, + { 0x1B7, "GPUREG_LIGHT7_SPOTDIR_Z" }, + + { 0x1B9, "GPUREG_LIGHT7_CONFIG" }, + { 0x1BA, "GPUREG_LIGHT7_ATTENUATION_BIAS" }, + { 0x1BB, "GPUREG_LIGHT7_ATTENUATION_SCALE" }, + + { 0x1C0, "GPUREG_LIGHTING_AMBIENT" }, + + { 0x1C2, "GPUREG_LIGHTING_NUM_LIGHTS" }, + { 0x1C3, "GPUREG_LIGHTING_CONFIG0" }, + { 0x1C4, "GPUREG_LIGHTING_CONFIG1" }, + { 0x1C5, "GPUREG_LIGHTING_LUT_INDEX" }, + { 0x1C6, "GPUREG_LIGHTING_ENABLE1" }, + + { 0x1C8, "GPUREG_LIGHTING_LUT_DATA0" }, + { 0x1C9, "GPUREG_LIGHTING_LUT_DATA1" }, + { 0x1CA, "GPUREG_LIGHTING_LUT_DATA2" }, + { 0x1CB, "GPUREG_LIGHTING_LUT_DATA3" }, + { 0x1CC, "GPUREG_LIGHTING_LUT_DATA4" }, + { 0x1CD, "GPUREG_LIGHTING_LUT_DATA5" }, + { 0x1CE, "GPUREG_LIGHTING_LUT_DATA6" }, + { 0x1CF, "GPUREG_LIGHTING_LUT_DATA7" }, + { 0x1D0, "GPUREG_LIGHTING_LUTINPUT_ABS" }, + { 0x1D1, "GPUREG_LIGHTING_LUTINPUT_SELECT" }, + { 0x1D2, "GPUREG_LIGHTING_LUTINPUT_SCALE" }, + + { 0x1D9, "GPUREG_LIGHTING_LIGHT_PERMUTATION" }, + + { 0x200, "GPUREG_ATTRIBBUFFERS_LOC" }, + { 0x201, "GPUREG_ATTRIBBUFFERS_FORMAT_LOW" }, + { 0x202, "GPUREG_ATTRIBBUFFERS_FORMAT_HIGH" }, + { 0x203, "GPUREG_ATTRIBBUFFER0_OFFSET" }, + { 0x204, "GPUREG_ATTRIBBUFFER0_CONFIG1" }, + { 0x205, "GPUREG_ATTRIBBUFFER0_CONFIG2" }, + { 0x206, "GPUREG_ATTRIBBUFFER1_OFFSET" }, + { 0x207, "GPUREG_ATTRIBBUFFER1_CONFIG1" }, + { 0x208, "GPUREG_ATTRIBBUFFER1_CONFIG2" }, + { 0x209, "GPUREG_ATTRIBBUFFER2_OFFSET" }, + { 0x20A, "GPUREG_ATTRIBBUFFER2_CONFIG1" }, + { 0x20B, "GPUREG_ATTRIBBUFFER2_CONFIG2" }, + { 0x20C, "GPUREG_ATTRIBBUFFER3_OFFSET" }, + { 0x20D, "GPUREG_ATTRIBBUFFER3_CONFIG1" }, + { 0x20E, "GPUREG_ATTRIBBUFFER3_CONFIG2" }, + { 0x20F, "GPUREG_ATTRIBBUFFER4_OFFSET" }, + { 0x210, "GPUREG_ATTRIBBUFFER4_CONFIG1" }, + { 0x211, "GPUREG_ATTRIBBUFFER4_CONFIG2" }, + { 0x212, "GPUREG_ATTRIBBUFFER5_OFFSET" }, + { 0x213, "GPUREG_ATTRIBBUFFER5_CONFIG1" }, + { 0x214, "GPUREG_ATTRIBBUFFER5_CONFIG2" }, + { 0x215, "GPUREG_ATTRIBBUFFER6_OFFSET" }, + { 0x216, "GPUREG_ATTRIBBUFFER6_CONFIG1" }, + { 0x217, "GPUREG_ATTRIBBUFFER6_CONFIG2" }, + { 0x218, "GPUREG_ATTRIBBUFFER7_OFFSET" }, + { 0x219, "GPUREG_ATTRIBBUFFER7_CONFIG1" }, + { 0x21A, "GPUREG_ATTRIBBUFFER7_CONFIG2" }, + { 0x21B, "GPUREG_ATTRIBBUFFER8_OFFSET" }, + { 0x21C, "GPUREG_ATTRIBBUFFER8_CONFIG1" }, + { 0x21D, "GPUREG_ATTRIBBUFFER8_CONFIG2" }, + { 0x21E, "GPUREG_ATTRIBBUFFER9_OFFSET" }, + { 0x21F, "GPUREG_ATTRIBBUFFER9_CONFIG1" }, + { 0x220, "GPUREG_ATTRIBBUFFER9_CONFIG2" }, + { 0x221, "GPUREG_ATTRIBBUFFER10_OFFSET" }, + { 0x222, "GPUREG_ATTRIBBUFFER10_CONFIG1" }, + { 0x223, "GPUREG_ATTRIBBUFFER10_CONFIG2" }, + { 0x224, "GPUREG_ATTRIBBUFFER11_OFFSET" }, + { 0x225, "GPUREG_ATTRIBBUFFER11_CONFIG1" }, + { 0x226, "GPUREG_ATTRIBBUFFER11_CONFIG2" }, + { 0x227, "GPUREG_INDEXBUFFER_CONFIG" }, + { 0x228, "GPUREG_NUMVERTICES" }, + { 0x229, "GPUREG_GEOSTAGE_CONFIG" }, + { 0x22A, "GPUREG_VERTEX_OFFSET" }, + + { 0x22D, "GPUREG_POST_VERTEX_CACHE_NUM" }, + { 0x22E, "GPUREG_DRAWARRAYS" }, + { 0x22F, "GPUREG_DRAWELEMENTS" }, + + { 0x231, "GPUREG_VTX_FUNC" }, + { 0x232, "GPUREG_FIXEDATTRIB_INDEX" }, + { 0x233, "GPUREG_FIXEDATTRIB_DATA0" }, + { 0x234, "GPUREG_FIXEDATTRIB_DATA1" }, + { 0x235, "GPUREG_FIXEDATTRIB_DATA2" }, + + { 0x238, "GPUREG_CMDBUF_SIZE0" }, + { 0x239, "GPUREG_CMDBUF_SIZE1" }, + { 0x23A, "GPUREG_CMDBUF_ADDR0" }, + { 0x23B, "GPUREG_CMDBUF_ADDR1" }, + { 0x23C, "GPUREG_CMDBUF_JUMP0" }, + { 0x23D, "GPUREG_CMDBUF_JUMP1" }, + + { 0x242, "GPUREG_VSH_NUM_ATTR" }, + + { 0x244, "GPUREG_VSH_COM_MODE" }, + { 0x245, "GPUREG_START_DRAW_FUNC0" }, + + { 0x24A, "GPUREG_VSH_OUTMAP_TOTAL1" }, + + { 0x251, "GPUREG_VSH_OUTMAP_TOTAL2" }, + { 0x252, "GPUREG_GSH_MISC0" }, + { 0x253, "GPUREG_GEOSTAGE_CONFIG2" }, + { 0x254, "GPUREG_GSH_MISC1" }, + + { 0x25E, "GPUREG_PRIMITIVE_CONFIG" }, + { 0x25F, "GPUREG_RESTART_PRIMITIVE" }, + + { 0x280, "GPUREG_GSH_BOOLUNIFORM" }, + { 0x281, "GPUREG_GSH_INTUNIFORM_I0" }, + { 0x282, "GPUREG_GSH_INTUNIFORM_I1" }, + { 0x283, "GPUREG_GSH_INTUNIFORM_I2" }, + { 0x284, "GPUREG_GSH_INTUNIFORM_I3" }, + + { 0x289, "GPUREG_GSH_INPUTBUFFER_CONFIG" }, + { 0x28A, "GPUREG_GSH_ENTRYPOINT" }, + { 0x28B, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW" }, + { 0x28C, "GPUREG_GSH_ATTRIBUTES_PERMUTATION_HIGH" }, + { 0x28D, "GPUREG_GSH_OUTMAP_MASK" }, + + { 0x28F, "GPUREG_GSH_CODETRANSFER_END" }, + { 0x290, "GPUREG_GSH_FLOATUNIFORM_INDEX" }, + { 0x291, "GPUREG_GSH_FLOATUNIFORM_DATA0" }, + { 0x292, "GPUREG_GSH_FLOATUNIFORM_DATA1" }, + { 0x293, "GPUREG_GSH_FLOATUNIFORM_DATA2" }, + { 0x294, "GPUREG_GSH_FLOATUNIFORM_DATA3" }, + { 0x295, "GPUREG_GSH_FLOATUNIFORM_DATA4" }, + { 0x296, "GPUREG_GSH_FLOATUNIFORM_DATA5" }, + { 0x297, "GPUREG_GSH_FLOATUNIFORM_DATA6" }, + { 0x298, "GPUREG_GSH_FLOATUNIFORM_DATA7" }, + + { 0x29B, "GPUREG_GSH_CODETRANSFER_INDEX" }, + { 0x29C, "GPUREG_GSH_CODETRANSFER_DATA0" }, + { 0x29D, "GPUREG_GSH_CODETRANSFER_DATA1" }, + { 0x29E, "GPUREG_GSH_CODETRANSFER_DATA2" }, + { 0x29F, "GPUREG_GSH_CODETRANSFER_DATA3" }, + { 0x2A0, "GPUREG_GSH_CODETRANSFER_DATA4" }, + { 0x2A1, "GPUREG_GSH_CODETRANSFER_DATA5" }, + { 0x2A2, "GPUREG_GSH_CODETRANSFER_DATA6" }, + { 0x2A3, "GPUREG_GSH_CODETRANSFER_DATA7" }, + + { 0x2A5, "GPUREG_GSH_OPDESCS_INDEX" }, + { 0x2A6, "GPUREG_GSH_OPDESCS_DATA0" }, + { 0x2A7, "GPUREG_GSH_OPDESCS_DATA1" }, + { 0x2A8, "GPUREG_GSH_OPDESCS_DATA2" }, + { 0x2A9, "GPUREG_GSH_OPDESCS_DATA3" }, + { 0x2AA, "GPUREG_GSH_OPDESCS_DATA4" }, + { 0x2AB, "GPUREG_GSH_OPDESCS_DATA5" }, + { 0x2AC, "GPUREG_GSH_OPDESCS_DATA6" }, + { 0x2AD, "GPUREG_GSH_OPDESCS_DATA7" }, + + { 0x2B0, "GPUREG_VSH_BOOLUNIFORM" }, + { 0x2B1, "GPUREG_VSH_INTUNIFORM_I0" }, + { 0x2B2, "GPUREG_VSH_INTUNIFORM_I1" }, + { 0x2B3, "GPUREG_VSH_INTUNIFORM_I2" }, + { 0x2B4, "GPUREG_VSH_INTUNIFORM_I3" }, + + { 0x2B9, "GPUREG_VSH_INPUTBUFFER_CONFIG" }, + { 0x2BA, "GPUREG_VSH_ENTRYPOINT" }, + { 0x2BB, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_LOW" }, + { 0x2BC, "GPUREG_VSH_ATTRIBUTES_PERMUTATION_HIGH" }, + { 0x2BD, "GPUREG_VSH_OUTMAP_MASK" }, + + { 0x2BF, "GPUREG_VSH_CODETRANSFER_END" }, + { 0x2C0, "GPUREG_VSH_FLOATUNIFORM_INDEX" }, + { 0x2C1, "GPUREG_VSH_FLOATUNIFORM_DATA0" }, + { 0x2C2, "GPUREG_VSH_FLOATUNIFORM_DATA1" }, + { 0x2C3, "GPUREG_VSH_FLOATUNIFORM_DATA2" }, + { 0x2C4, "GPUREG_VSH_FLOATUNIFORM_DATA3" }, + { 0x2C5, "GPUREG_VSH_FLOATUNIFORM_DATA4" }, + { 0x2C6, "GPUREG_VSH_FLOATUNIFORM_DATA5" }, + { 0x2C7, "GPUREG_VSH_FLOATUNIFORM_DATA6" }, + { 0x2C8, "GPUREG_VSH_FLOATUNIFORM_DATA7" }, + + { 0x2CB, "GPUREG_VSH_CODETRANSFER_INDEX" }, + { 0x2CC, "GPUREG_VSH_CODETRANSFER_DATA0" }, + { 0x2CD, "GPUREG_VSH_CODETRANSFER_DATA1" }, + { 0x2CE, "GPUREG_VSH_CODETRANSFER_DATA2" }, + { 0x2CF, "GPUREG_VSH_CODETRANSFER_DATA3" }, + { 0x2D0, "GPUREG_VSH_CODETRANSFER_DATA4" }, + { 0x2D1, "GPUREG_VSH_CODETRANSFER_DATA5" }, + { 0x2D2, "GPUREG_VSH_CODETRANSFER_DATA6" }, + { 0x2D3, "GPUREG_VSH_CODETRANSFER_DATA7" }, + + { 0x2D5, "GPUREG_VSH_OPDESCS_INDEX" }, + { 0x2D6, "GPUREG_VSH_OPDESCS_DATA0" }, + { 0x2D7, "GPUREG_VSH_OPDESCS_DATA1" }, + { 0x2D8, "GPUREG_VSH_OPDESCS_DATA2" }, + { 0x2D9, "GPUREG_VSH_OPDESCS_DATA3" }, + { 0x2DA, "GPUREG_VSH_OPDESCS_DATA4" }, + { 0x2DB, "GPUREG_VSH_OPDESCS_DATA5" }, + { 0x2DC, "GPUREG_VSH_OPDESCS_DATA6" }, + { 0x2DD, "GPUREG_VSH_OPDESCS_DATA7" }, +}; + std::string Regs::GetCommandName(int index) { - static std::unordered_map<u32, std::string> map; + static std::unordered_map<u32, const char*> map; if (map.empty()) { - #define ADD_FIELD(name) \ - map.insert({static_cast<u32>(PICA_REG_INDEX(name)), #name}); \ - /* TODO: change to Regs::name when VS2015 and other compilers support it */ \ - for (u32 i = PICA_REG_INDEX(name) + 1; i < PICA_REG_INDEX(name) + sizeof(Regs().name) / 4; ++i) \ - map.insert({i, #name + std::string("+") + std::to_string(i-PICA_REG_INDEX(name))}); \ - - ADD_FIELD(trigger_irq); - ADD_FIELD(cull_mode); - ADD_FIELD(viewport_size_x); - ADD_FIELD(viewport_size_y); - ADD_FIELD(viewport_depth_range); - ADD_FIELD(viewport_depth_far_plane); - ADD_FIELD(viewport_corner); - ADD_FIELD(texture0_enable); - ADD_FIELD(texture0); - ADD_FIELD(texture0_format); - ADD_FIELD(texture1); - ADD_FIELD(texture1_format); - ADD_FIELD(texture2); - ADD_FIELD(texture2_format); - ADD_FIELD(tev_stage0); - ADD_FIELD(tev_stage1); - ADD_FIELD(tev_stage2); - ADD_FIELD(tev_stage3); - ADD_FIELD(tev_combiner_buffer_input); - ADD_FIELD(tev_stage4); - ADD_FIELD(tev_stage5); - ADD_FIELD(tev_combiner_buffer_color); - ADD_FIELD(output_merger); - ADD_FIELD(framebuffer); - ADD_FIELD(vertex_attributes); - ADD_FIELD(index_array); - ADD_FIELD(num_vertices); - ADD_FIELD(vertex_offset); - ADD_FIELD(trigger_draw); - ADD_FIELD(trigger_draw_indexed); - ADD_FIELD(vs_default_attributes_setup); - ADD_FIELD(command_buffer); - ADD_FIELD(triangle_topology); - ADD_FIELD(restart_primitive); - ADD_FIELD(gs.bool_uniforms); - ADD_FIELD(gs.int_uniforms); - ADD_FIELD(gs.main_offset); - ADD_FIELD(gs.input_register_map); - ADD_FIELD(gs.uniform_setup); - ADD_FIELD(gs.program); - ADD_FIELD(gs.swizzle_patterns); - ADD_FIELD(vs.bool_uniforms); - ADD_FIELD(vs.int_uniforms); - ADD_FIELD(vs.main_offset); - ADD_FIELD(vs.input_register_map); - ADD_FIELD(vs.uniform_setup); - ADD_FIELD(vs.program); - ADD_FIELD(vs.swizzle_patterns); - -#undef ADD_FIELD + map.insert(begin(register_names), end(register_names)); } // Return empty string if no match is found diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 2f1b2dec4..2e0c33201 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -16,6 +16,8 @@ #include "common/vector_math.h" #include "common/logging/log.h" +#include "pica_types.h" + namespace Pica { // Returns index corresponding to the Regs member labeled by field_name @@ -239,7 +241,8 @@ struct Regs { TextureConfig texture0; INSERT_PADDING_WORDS(0x8); BitField<0, 4, TextureFormat> texture0_format; - INSERT_PADDING_WORDS(0x2); + BitField<0, 1, u32> fragment_lighting_enable; + INSERT_PADDING_WORDS(0x1); TextureConfig texture1; BitField<0, 4, TextureFormat> texture1_format; INSERT_PADDING_WORDS(0x2); @@ -641,7 +644,268 @@ struct Regs { } } - INSERT_PADDING_WORDS(0xe0); + INSERT_PADDING_WORDS(0x20); + + enum class LightingSampler { + Distribution0 = 0, + Distribution1 = 1, + Fresnel = 3, + ReflectBlue = 4, + ReflectGreen = 5, + ReflectRed = 6, + SpotlightAttenuation = 8, + DistanceAttenuation = 16, + }; + + /** + * Pica fragment lighting supports using different LUTs for each lighting component: + * Reflectance R, G, and B channels, distribution function for specular components 0 and 1, + * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel + * (or whether a channel is enabled at all) is specified by various pre-defined lighting + * configurations. With configurations that require more LUTs, more cycles are required on HW to + * perform lighting computations. + */ + enum class LightingConfig { + Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight + Config1 = 1, ///< Reflect Red, Fresnel, Spotlight + Config2 = 2, ///< Reflect Red, Distribution 0/1 + Config3 = 3, ///< Distribution 0/1, Fresnel + Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight + Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight + Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight + Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight + ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration + }; + + /// Selects which lighting components are affected by fresnel + enum class LightingFresnelSelector { + None = 0, ///< Fresnel is disabled + PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel + SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel + Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel + }; + + /// Factor used to scale the output of a lighting LUT + enum class LightingScale { + Scale1 = 0, ///< Scale is 1x + Scale2 = 1, ///< Scale is 2x + Scale4 = 2, ///< Scale is 4x + Scale8 = 3, ///< Scale is 8x + Scale1_4 = 6, ///< Scale is 0.25x + Scale1_2 = 7, ///< Scale is 0.5x + }; + + enum class LightingLutInput { + NH = 0, // Cosine of the angle between the normal and half-angle vectors + VH = 1, // Cosine of the angle between the view and half-angle vectors + NV = 2, // Cosine of the angle between the normal and the view vector + LN = 3, // Cosine of the angle between the light and the normal vectors + }; + + enum class LightingBumpMode : u32 { + None = 0, + NormalMap = 1, + TangentMap = 2, + }; + + union LightColor { + BitField< 0, 10, u32> b; + BitField<10, 10, u32> g; + BitField<20, 10, u32> r; + + Math::Vec3f ToVec3f() const { + // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component + return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); + } + }; + + /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration + static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { + switch (sampler) { + case LightingSampler::Distribution0: + return (config != LightingConfig::Config1); + + case LightingSampler::Distribution1: + return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); + + case LightingSampler::Fresnel: + return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); + + case LightingSampler::ReflectRed: + return (config != LightingConfig::Config3); + + case LightingSampler::ReflectGreen: + case LightingSampler::ReflectBlue: + return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); + } + return false; + } + + struct { + struct LightSrc { + LightColor specular_0; // material.specular_0 * light.specular_0 + LightColor specular_1; // material.specular_1 * light.specular_1 + LightColor diffuse; // material.diffuse * light.diffuse + LightColor ambient; // material.ambient * light.ambient + + struct { + // Encoded as 16-bit floating point + union { + BitField< 0, 16, u32> x; + BitField<16, 16, u32> y; + }; + union { + BitField< 0, 16, u32> z; + }; + + INSERT_PADDING_WORDS(0x3); + + union { + BitField<0, 1, u32> directional; + BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 + }; + }; + + BitField<0, 20, u32> dist_atten_bias; + BitField<0, 20, u32> dist_atten_scale; + + INSERT_PADDING_WORDS(0x4); + }; + static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); + + LightSrc light[8]; + LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) + INSERT_PADDING_WORDS(0x1); + BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 + + union { + BitField< 2, 2, LightingFresnelSelector> fresnel_selector; + BitField< 4, 4, LightingConfig> config; + BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 + BitField<27, 1, u32> clamp_highlights; + BitField<28, 2, LightingBumpMode> bump_mode; + BitField<30, 1, u32> disable_bump_renorm; + }; + + union { + BitField<16, 1, u32> disable_lut_d0; + BitField<17, 1, u32> disable_lut_d1; + BitField<19, 1, u32> disable_lut_fr; + BitField<20, 1, u32> disable_lut_rr; + BitField<21, 1, u32> disable_lut_rg; + BitField<22, 1, u32> disable_lut_rb; + + // Each bit specifies whether distance attenuation should be applied for the + // corresponding light + + BitField<24, 1, u32> disable_dist_atten_light_0; + BitField<25, 1, u32> disable_dist_atten_light_1; + BitField<26, 1, u32> disable_dist_atten_light_2; + BitField<27, 1, u32> disable_dist_atten_light_3; + BitField<28, 1, u32> disable_dist_atten_light_4; + BitField<29, 1, u32> disable_dist_atten_light_5; + BitField<30, 1, u32> disable_dist_atten_light_6; + BitField<31, 1, u32> disable_dist_atten_light_7; + }; + + bool IsDistAttenDisabled(unsigned index) const { + const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1, + disable_dist_atten_light_2, disable_dist_atten_light_3, + disable_dist_atten_light_4, disable_dist_atten_light_5, + disable_dist_atten_light_6, disable_dist_atten_light_7 }; + return disable[index] != 0; + } + + union { + BitField<0, 8, u32> index; ///< Index at which to set data in the LUT + BitField<8, 5, u32> type; ///< Type of LUT for which to set data + } lut_config; + + BitField<0, 1, u32> disable; + INSERT_PADDING_WORDS(0x1); + + // When data is written to any of these registers, it gets written to the lookup table of + // the selected type at the selected index, specified above in the `lut_config` register. + // With each write, `lut_config.index` is incremented. It does not matter which of these + // registers is written to, the behavior will be the same. + u32 lut_data[8]; + + // These are used to specify if absolute (abs) value should be used for each LUT index. When + // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in + // the range of (0.0, 1.0). + union { + BitField< 1, 1, u32> disable_d0; + BitField< 5, 1, u32> disable_d1; + BitField< 9, 1, u32> disable_sp; + BitField<13, 1, u32> disable_fr; + BitField<17, 1, u32> disable_rb; + BitField<21, 1, u32> disable_rg; + BitField<25, 1, u32> disable_rr; + } abs_lut_input; + + union { + BitField< 0, 3, LightingLutInput> d0; + BitField< 4, 3, LightingLutInput> d1; + BitField< 8, 3, LightingLutInput> sp; + BitField<12, 3, LightingLutInput> fr; + BitField<16, 3, LightingLutInput> rb; + BitField<20, 3, LightingLutInput> rg; + BitField<24, 3, LightingLutInput> rr; + } lut_input; + + union { + BitField< 0, 3, LightingScale> d0; + BitField< 4, 3, LightingScale> d1; + BitField< 8, 3, LightingScale> sp; + BitField<12, 3, LightingScale> fr; + BitField<16, 3, LightingScale> rb; + BitField<20, 3, LightingScale> rg; + BitField<24, 3, LightingScale> rr; + + static float GetScale(LightingScale scale) { + switch (scale) { + case LightingScale::Scale1: + return 1.0f; + case LightingScale::Scale2: + return 2.0f; + case LightingScale::Scale4: + return 4.0f; + case LightingScale::Scale8: + return 8.0f; + case LightingScale::Scale1_4: + return 0.25f; + case LightingScale::Scale1_2: + return 0.5f; + } + return 0.0f; + } + } lut_scale; + + INSERT_PADDING_WORDS(0x6); + + union { + // There are 8 light enable "slots", corresponding to the total number of lights + // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num' + // above), the first N slots below will be set to integers within the range of 0-7, + // corresponding to the actual light that is enabled for each slot. + + BitField< 0, 3, u32> slot_0; + BitField< 4, 3, u32> slot_1; + BitField< 8, 3, u32> slot_2; + BitField<12, 3, u32> slot_3; + BitField<16, 3, u32> slot_4; + BitField<20, 3, u32> slot_5; + BitField<24, 3, u32> slot_6; + BitField<28, 3, u32> slot_7; + + unsigned GetNum(unsigned index) const { + const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; + return enable_slots[index]; + } + } light_enable; + } lighting; + + INSERT_PADDING_WORDS(0x26); enum class VertexAttributeFormat : u64 { BYTE = 0, @@ -825,7 +1089,16 @@ struct Regs { } } command_buffer; - INSERT_PADDING_WORDS(0x20); + INSERT_PADDING_WORDS(0x07); + + enum class GPUMode : u32 { + Drawing = 0, + Configuring = 1 + }; + + GPUMode gpu_mode; + + INSERT_PADDING_WORDS(0x18); enum class TriangleTopology : u32 { List = 0, @@ -990,6 +1263,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68); ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); +ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f); ASSERT_REG_POSITION(texture1, 0x91); ASSERT_REG_POSITION(texture1_format, 0x96); ASSERT_REG_POSITION(texture2, 0x99); @@ -1004,6 +1278,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8); ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); +ASSERT_REG_POSITION(lighting, 0x140); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); ASSERT_REG_POSITION(num_vertices, 0x228); @@ -1012,6 +1287,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); ASSERT_REG_POSITION(command_buffer, 0x238); +ASSERT_REG_POSITION(gpu_mode, 0x245); ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(restart_primitive, 0x25f); ASSERT_REG_POSITION(gs, 0x280); @@ -1026,157 +1302,10 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); -struct float24 { - static float24 FromFloat32(float val) { - float24 ret; - ret.value = val; - return ret; - } - - // 16 bit mantissa, 7 bit exponent, 1 bit sign - // TODO: No idea if this works as intended - static float24 FromRawFloat24(u32 hex) { - float24 ret; - if ((hex & 0xFFFFFF) == 0) { - ret.value = 0; - } else { - u32 mantissa = hex & 0xFFFF; - u32 exponent = (hex >> 16) & 0x7F; - u32 sign = hex >> 23; - ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); - if (sign) - ret.value = -ret.value; - } - return ret; - } - - static float24 Zero() { - return FromFloat32(0.f); - } - - // Not recommended for anything but logging - float ToFloat32() const { - return value; - } - - float24 operator * (const float24& flt) const { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - return Zero(); - return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); - } - - float24 operator / (const float24& flt) const { - return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); - } - - float24 operator + (const float24& flt) const { - return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); - } - - float24 operator - (const float24& flt) const { - return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); - } - - float24& operator *= (const float24& flt) { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - *this = Zero(); - else value *= flt.ToFloat32(); - return *this; - } - - float24& operator /= (const float24& flt) { - value /= flt.ToFloat32(); - return *this; - } - - float24& operator += (const float24& flt) { - value += flt.ToFloat32(); - return *this; - } - - float24& operator -= (const float24& flt) { - value -= flt.ToFloat32(); - return *this; - } - - float24 operator - () const { - return float24::FromFloat32(-ToFloat32()); - } - - bool operator < (const float24& flt) const { - return ToFloat32() < flt.ToFloat32(); - } - - bool operator > (const float24& flt) const { - return ToFloat32() > flt.ToFloat32(); - } - - bool operator >= (const float24& flt) const { - return ToFloat32() >= flt.ToFloat32(); - } - - bool operator <= (const float24& flt) const { - return ToFloat32() <= flt.ToFloat32(); - } - - bool operator == (const float24& flt) const { - return ToFloat32() == flt.ToFloat32(); - } - - bool operator != (const float24& flt) const { - return ToFloat32() != flt.ToFloat32(); - } - -private: - // Stored as a regular float, merely for convenience - // TODO: Perform proper arithmetic on this! - float value; -}; -static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); - -/// Struct used to describe current Pica state -struct State { - /// Pica registers - Regs regs; - - /// Vertex shader memory - struct ShaderSetup { - struct { - // The float uniforms are accessed by the shader JIT using SSE instructions, and are - // therefore required to be 16-byte aligned. - Math::Vec4<float24> MEMORY_ALIGNED16(f[96]); - - std::array<bool, 16> b; - std::array<Math::Vec4<u8>, 4> i; - } uniforms; - - Math::Vec4<float24> default_attributes[16]; - - std::array<u32, 1024> program_code; - std::array<u32, 1024> swizzle_data; - }; - - ShaderSetup vs; - ShaderSetup gs; - - /// Current Pica command list - struct { - const u32* head_ptr; - const u32* current_ptr; - u32 length; - } cmd_list; -}; - /// Initialize Pica state void Init(); /// Shutdown Pica state void Shutdown(); -extern State g_state; ///< Current Pica state - } // namespace diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h new file mode 100644 index 000000000..c7616bc55 --- /dev/null +++ b/src/video_core/pica_state.h @@ -0,0 +1,60 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/pica.h" +#include "video_core/primitive_assembly.h" +#include "video_core/shader/shader.h" + +namespace Pica { + +/// Struct used to describe current Pica state +struct State { + /// Pica registers + Regs regs; + + Shader::ShaderSetup vs; + Shader::ShaderSetup gs; + + struct { + union LutEntry { + // Used for raw access + u32 raw; + + // LUT value, encoded as 12-bit fixed point, with 12 fraction bits + BitField< 0, 12, u32> value; + + // Used by HW for efficient interpolation, Citra does not use these + BitField<12, 12, u32> difference; + + float ToFloat() { + return static_cast<float>(value) / 4095.f; + } + }; + + std::array<std::array<LutEntry, 256>, 24> luts; + } lighting; + + /// Current Pica command list + struct { + const u32* head_ptr; + const u32* current_ptr; + u32 length; + } cmd_list; + + /// Struct used to describe immediate mode rendering state + struct ImmediateModeState { + Shader::InputVertex input; + // This is constructed with a dummy triangle topology + PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; + int attribute_id = 0; + + ImmediateModeState() : primitive_assembler(Regs::TriangleTopology::List) {} + } immediate; +}; + +extern State g_state; ///< Current Pica state + +} // namespace diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h new file mode 100644 index 000000000..ecf45654b --- /dev/null +++ b/src/video_core/pica_types.h @@ -0,0 +1,146 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstring> + +#include "common/common_types.h" + +namespace Pica { + +/** + * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision + * floating point. + * + * When decoding, format is as follows: + * - The first `M` bits are the mantissa + * - The next `E` bits are the exponent + * - The last bit is the sign bit + * + * @todo Verify on HW if this conversion is sufficiently accurate. + */ +template<unsigned M, unsigned E> +struct Float { +public: + static Float<M, E> FromFloat32(float val) { + Float<M, E> ret; + ret.value = val; + return ret; + } + + static Float<M, E> FromRaw(u32 hex) { + Float<M, E> res; + + const int width = M + E + 1; + const int bias = 128 - (1 << (E - 1)); + const int exponent = (hex >> M) & ((1 << E) - 1); + const unsigned mantissa = hex & ((1 << M) - 1); + + if (hex & ((1 << (width - 1)) - 1)) + hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23); + else + hex = ((hex >> (E + M)) << 31); + + std::memcpy(&res.value, &hex, sizeof(float)); + + return res; + } + + static Float<M, E> Zero() { + return FromFloat32(0.f); + } + + // Not recommended for anything but logging + float ToFloat32() const { + return value; + } + + Float<M, E> operator * (const Float<M, E>& flt) const { + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + return Zero(); + return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); + } + + Float<M, E> operator / (const Float<M, E>& flt) const { + return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); + } + + Float<M, E> operator + (const Float<M, E>& flt) const { + return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); + } + + Float<M, E> operator - (const Float<M, E>& flt) const { + return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); + } + + Float<M, E>& operator *= (const Float<M, E>& flt) { + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + *this = Zero(); + else value *= flt.ToFloat32(); + return *this; + } + + Float<M, E>& operator /= (const Float<M, E>& flt) { + value /= flt.ToFloat32(); + return *this; + } + + Float<M, E>& operator += (const Float<M, E>& flt) { + value += flt.ToFloat32(); + return *this; + } + + Float<M, E>& operator -= (const Float<M, E>& flt) { + value -= flt.ToFloat32(); + return *this; + } + + Float<M, E> operator - () const { + return Float<M, E>::FromFloat32(-ToFloat32()); + } + + bool operator < (const Float<M, E>& flt) const { + return ToFloat32() < flt.ToFloat32(); + } + + bool operator > (const Float<M, E>& flt) const { + return ToFloat32() > flt.ToFloat32(); + } + + bool operator >= (const Float<M, E>& flt) const { + return ToFloat32() >= flt.ToFloat32(); + } + + bool operator <= (const Float<M, E>& flt) const { + return ToFloat32() <= flt.ToFloat32(); + } + + bool operator == (const Float<M, E>& flt) const { + return ToFloat32() == flt.ToFloat32(); + } + + bool operator != (const Float<M, E>& flt) const { + return ToFloat32() != flt.ToFloat32(); + } + +private: + static const unsigned MASK = (1 << (M + E + 1)) - 1; + static const unsigned MANTISSA_MASK = (1 << M) - 1; + static const unsigned EXPONENT_MASK = (1 << E) - 1; + + // Stored as a regular float, merely for convenience + // TODO: Perform proper arithmetic on this! + float value; +}; + +using float24 = Float<16, 7>; +using float20 = Float<12, 7>; +using float16 = Float<10, 5>; + +} // namespace Pica diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 44a8dbfe9..0061690f1 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -39,13 +39,12 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandl buffer[buffer_index] = vtx; - if (topology == Regs::TriangleTopology::Strip) { - strip_ready |= (buffer_index == 1); + strip_ready |= (buffer_index == 1); + + if (topology == Regs::TriangleTopology::Strip) buffer_index = !buffer_index; - } else if (topology == Regs::TriangleTopology::Fan) { + else if (topology == Regs::TriangleTopology::Fan) buffer_index = 1; - strip_ready = true; - } break; default: @@ -54,6 +53,18 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandl } } +template<typename VertexType> +void PrimitiveAssembler<VertexType>::Reset() { + buffer_index = 0; + strip_ready = false; +} + +template<typename VertexType> +void PrimitiveAssembler<VertexType>::Reconfigure(Regs::TriangleTopology topology) { + Reset(); + this->topology = topology; +} + // explicitly instantiate use cases template struct PrimitiveAssembler<Shader::OutputVertex>; diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 52d0ec8ff..cc6e5fde5 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -30,6 +30,16 @@ struct PrimitiveAssembler { */ void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); + /** + * Resets the internal state of the PrimitiveAssembler. + */ + void Reset(); + + /** + * Reconfigures the PrimitiveAssembler to use a different triangle topology. + */ + void Reconfigure(Regs::TriangleTopology topology); + private: Regs::TriangleTopology topology; diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index ecfdbc9e8..fd02aa652 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -15,6 +15,7 @@ #include "core/hw/gpu.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/rasterizer.h" #include "video_core/utils.h" #include "video_core/debug_utils/debug_utils.h" @@ -857,12 +858,12 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } } - // TODO: Does depth indeed only get written even if depth testing is enabled? + unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); + u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); + if (output_merger.depth_test_enable) { - unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); - u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); u32 ref_z = GetDepth(x >> 4, y >> 4); bool pass = false; @@ -906,11 +907,11 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, UpdateStencil(stencil_test.action_depth_fail); continue; } - - if (output_merger.depth_write_enable) - SetDepth(x >> 4, y >> 4, z); } + if (output_merger.depth_write_enable) + SetDepth(x >> 4, y >> 4, z); + // The stencil depth_pass action is executed even if depth testing is disabled if (stencil_action_enable) UpdateStencil(stencil_test.action_depth_pass); diff --git a/src/video_core/hwrasterizer_base.h b/src/video_core/rasterizer_interface.h index 54b8892fb..008c5827b 100644 --- a/src/video_core/hwrasterizer_base.h +++ b/src/video_core/rasterizer_interface.h @@ -12,10 +12,11 @@ struct OutputVertex; } } -class HWRasterizer { +namespace VideoCore { + +class RasterizerInterface { public: - virtual ~HWRasterizer() { - } + virtual ~RasterizerInterface() {} /// Initialize API-specific GPU objects virtual void InitObjects() = 0; @@ -32,14 +33,16 @@ public: virtual void DrawTriangles() = 0; /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer - virtual void CommitFramebuffer() = 0; + virtual void FlushFramebuffer() = 0; /// Notify rasterizer that the specified PICA register has been changed virtual void NotifyPicaRegisterChanged(u32 id) = 0; - /// Notify rasterizer that the specified 3DS memory region will be read from after this notification - virtual void NotifyPreRead(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory. + virtual void FlushRegion(PAddr addr, u32 size) = 0; - /// Notify rasterizer that a 3DS memory region has been changed - virtual void NotifyFlush(PAddr addr, u32 size) = 0; + /// Notify rasterizer that any caches of the specified region should be discraded and reloaded from 3DS memory. + virtual void InvalidateRegion(PAddr addr, u32 size) = 0; }; + +} diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp new file mode 100644 index 000000000..6467ff723 --- /dev/null +++ b/src/video_core/renderer_base.cpp @@ -0,0 +1,29 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> + +#include "common/make_unique.h" + +#include "core/settings.h" + +#include "video_core/renderer_base.h" +#include "video_core/video_core.h" +#include "video_core/swrasterizer.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" + +void RendererBase::RefreshRasterizerSetting() { + bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; + if (rasterizer == nullptr || opengl_rasterizer_active != hw_renderer_enabled) { + opengl_rasterizer_active = hw_renderer_enabled; + + if (hw_renderer_enabled) { + rasterizer = Common::make_unique<RasterizerOpenGL>(); + } else { + rasterizer = Common::make_unique<VideoCore::SWRasterizer>(); + } + rasterizer->InitObjects(); + rasterizer->Reset(); + } +} diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 6587bcf27..f68091cc8 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -8,7 +8,7 @@ #include "common/common_types.h" -#include "video_core/hwrasterizer_base.h" +#include "video_core/rasterizer_interface.h" class EmuWindow; @@ -22,9 +22,6 @@ public: kFramebuffer_Texture }; - RendererBase() : m_current_fps(0), m_current_frame(0) { - } - virtual ~RendererBase() { } @@ -38,7 +35,7 @@ public: virtual void SetWindow(EmuWindow* window) = 0; /// Initialize the renderer - virtual void Init() = 0; + virtual bool Init() = 0; /// Shutdown the renderer virtual void ShutDown() = 0; @@ -46,18 +43,25 @@ public: // Getter/setter functions: // ------------------------ - f32 GetCurrentframe() const { + f32 GetCurrentFPS() const { return m_current_fps; } - int current_frame() const { + int GetCurrentFrame() const { return m_current_frame; } - std::unique_ptr<HWRasterizer> hw_rasterizer; + VideoCore::RasterizerInterface* Rasterizer() const { + return rasterizer.get(); + } + + void RefreshRasterizerSetting(); protected: - f32 m_current_fps; ///< Current framerate, should be set by the renderer - int m_current_frame; ///< Current frame, should be set by the renderer + std::unique_ptr<VideoCore::RasterizerInterface> rasterizer; + f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer + int m_current_frame = 0; ///< Current frame, should be set by the renderer +private: + bool opengl_rasterizer_active = false; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 822739088..b3dc6aa19 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -19,6 +19,7 @@ #include "core/hw/gpu.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/utils.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -36,7 +37,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { stage.GetAlphaMultiplier() == 1); } -RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr(0) { } +RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } RasterizerOpenGL::~RasterizerOpenGL() { } void RasterizerOpenGL::InitObjects() { @@ -75,6 +76,12 @@ void RasterizerOpenGL::InitObjects() { glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); + + glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); + SetShader(); // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation @@ -120,12 +127,26 @@ void RasterizerOpenGL::InitObjects() { glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); + for (size_t i = 0; i < lighting_lut.size(); ++i) { + lighting_lut[i].Create(); + state.lighting_lut[i].texture_1d = lighting_lut[i].handle; + + glActiveTexture(GL_TEXTURE3 + i); + glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d); + + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } + state.Apply(); + ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); } void RasterizerOpenGL::Reset() { SyncCullMode(); + SyncDepthModifiers(); SyncBlendEnabled(); SyncBlendFuncs(); SyncBlendColor(); @@ -135,15 +156,37 @@ void RasterizerOpenGL::Reset() { SetShader(); - res_cache.FullFlush(); + res_cache.InvalidateAll(); +} + +/** + * This is a helper function to resolve an issue with opposite quaternions being interpolated by + * OpenGL. See below for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore + * making Dot(-Q1, W2) positive. + * + * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This + * should be correct for nearly all cases, however a more correct implementation (but less trivial + * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions + * manually using two Lerps, and doing this correction before each Lerp. + */ +static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { + Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; + Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; + + return (Math::Dot(a, b) < 0.f); } void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) { - vertex_batch.emplace_back(v0); - vertex_batch.emplace_back(v1); - vertex_batch.emplace_back(v2); + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); } void RasterizerOpenGL::DrawTriangles() { @@ -155,6 +198,13 @@ void RasterizerOpenGL::DrawTriangles() { state.draw.shader_dirty = false; } + for (unsigned index = 0; index < lighting_lut.size(); index++) { + if (uniform_block_data.lut_dirty[index]) { + SyncLightingLUT(index); + uniform_block_data.lut_dirty[index] = false; + } + } + if (uniform_block_data.dirty) { glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); uniform_block_data.dirty = false; @@ -168,19 +218,17 @@ void RasterizerOpenGL::DrawTriangles() { // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; - res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size, true); - res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size, true); + res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); + res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true); } -void RasterizerOpenGL::CommitFramebuffer() { +void RasterizerOpenGL::FlushFramebuffer() { CommitColorBuffer(); CommitDepthBuffer(); } @@ -188,15 +236,18 @@ void RasterizerOpenGL::CommitFramebuffer() { void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { const auto& regs = Pica::g_state.regs; - if (!Settings::values.use_hw_renderer) - return; - switch(id) { // Culling case PICA_REG_INDEX(cull_mode): SyncCullMode(); break; + // Depth modifiers + case PICA_REG_INDEX(viewport_depth_range): + case PICA_REG_INDEX(viewport_depth_far_plane): + SyncDepthModifiers(); + break; + // Blending case PICA_REG_INDEX(output_merger.alphablend_enable): SyncBlendEnabled(); @@ -281,54 +332,203 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(tev_combiner_buffer_color): SyncCombinerColor(); break; + + // Fragment lighting specular 0 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10): + SyncLightSpecular1(7); + break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): + SyncLightPosition(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10): + SyncLightPosition(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10): + SyncLightPosition(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10): + SyncLightPosition(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10): + SyncLightPosition(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10): + SyncLightPosition(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10): + SyncLightPosition(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10): + SyncLightPosition(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0): + SyncGlobalAmbient(); + break; + + // Fragment lighting lookup tables + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): + { + auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lut_dirty[lut_config.type / 4] = true; + break; + } + } } -void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { +void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { const auto& regs = Pica::g_state.regs; - if (!Settings::values.use_hw_renderer) - return; + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); - - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; // If source memory region overlaps 3DS framebuffers, commit them before the copy happens - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) CommitColorBuffer(); - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) CommitDepthBuffer(); } -void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) { +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { const auto& regs = Pica::g_state.regs; - if (!Settings::values.use_hw_renderer) - return; - - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) ReloadColorBuffer(); - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) ReloadDepthBuffer(); // Notify cache of flush in case the region touches a cached resource - res_cache.NotifyFlush(addr, size); + res_cache.InvalidateInRange(addr, size); } void RasterizerOpenGL::SamplerInfo::Create() { @@ -499,27 +699,48 @@ void RasterizerOpenGL::SetShader() { uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } + // Set the texture samplers to correspond to different lookup table texture units + GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } + current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); glUniformBlockBinding(current_shader->shader.handle, block_index, 0); - } - // Update uniforms - SyncAlphaTest(); - SyncCombinerColor(); - auto& tev_stages = Pica::g_state.regs.GetTevStages(); - for (int index = 0; index < tev_stages.size(); ++index) - SyncTevConstColor(index, tev_stages[index]); + // Update uniforms + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.GetTevStages(); + for (int index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (int light_index = 0; light_index < 8; light_index++) { + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + } + } } void RasterizerOpenGL::SyncFramebuffer() { const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) || @@ -531,10 +752,10 @@ void RasterizerOpenGL::SyncFramebuffer() { bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || fb_size_changed; - bool color_fb_modified = last_fb_color_addr != cur_fb_color_addr || + bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr || color_fb_prop_changed; - bool depth_fb_modified = last_fb_depth_addr != cur_fb_depth_addr || + bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr || depth_fb_prop_changed; // Commit if framebuffer modified in any way @@ -574,13 +795,13 @@ void RasterizerOpenGL::SyncFramebuffer() { // Load buffer data again if fb modified in any way if (color_fb_modified) { - last_fb_color_addr = cur_fb_color_addr; + cached_fb_color_addr = new_fb_color_addr; ReloadColorBuffer(); } if (depth_fb_modified) { - last_fb_depth_addr = cur_fb_depth_addr; + cached_fb_depth_addr = new_fb_depth_addr; ReloadDepthBuffer(); } @@ -596,12 +817,12 @@ void RasterizerOpenGL::SyncCullMode() { case Pica::Regs::CullMode::KeepClockWise: state.cull.enabled = true; - state.cull.mode = GL_BACK; + state.cull.front_face = GL_CW; break; case Pica::Regs::CullMode::KeepCounterClockWise: state.cull.enabled = true; - state.cull.mode = GL_FRONT; + state.cull.front_face = GL_CCW; break; default: @@ -611,6 +832,15 @@ void RasterizerOpenGL::SyncCullMode() { } } +void RasterizerOpenGL::SyncDepthModifiers() { + float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; + + // TODO: Implement scale modifier + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; +} + void RasterizerOpenGL::SyncBlendEnabled() { state.blend.enabled = (Pica::g_state.regs.output_merger.alphablend_enable == 1); } @@ -657,8 +887,10 @@ void RasterizerOpenGL::SyncStencilTest() { void RasterizerOpenGL::SyncDepthTest() { const auto& regs = Pica::g_state.regs; - state.depth.test_enabled = (regs.output_merger.depth_test_enable == 1); - state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func); + state.depth.test_enabled = regs.output_merger.depth_test_enable == 1 || + regs.output_merger.depth_write_enable == 1; + state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? + PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; state.color_mask.red_enabled = regs.output_merger.red_enable; state.color_mask.green_enabled = regs.output_merger.green_enable; state.color_mask.blue_enabled = regs.output_merger.blue_enable; @@ -682,19 +914,87 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS } } +void RasterizerOpenGL::SyncGlobalAmbient() { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { + std::array<GLvec4, 256> new_data; + + for (unsigned offset = 0; offset < new_data.size(); ++offset) { + new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); + new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat(); + new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat(); + new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat(); + } + + if (new_data != lighting_lut_data[lut_index]) { + lighting_lut_data[lut_index] = new_data; + glActiveTexture(GL_TEXTURE3 + lut_index); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); + } +} + +void RasterizerOpenGL::SyncLightSpecular0(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular1(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDiffuse(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightAmbient(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightPosition(int light_index) { + GLvec3 position = { + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncDrawState() { const auto& regs = Pica::g_state.regs; // Sync the viewport - GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; + GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; + GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; // OpenGL uses different y coordinates, so negate corner offset and flip origin // TODO: Ensure viewport_corner.x should not be negated or origin flipped // TODO: Use floating-point viewports for accuracy if supported - glViewport((GLsizei)static_cast<float>(regs.viewport_corner.x), - -(GLsizei)static_cast<float>(regs.viewport_corner.y) - + regs.framebuffer.GetHeight() - viewport_height, + glViewport((GLsizei)regs.viewport_corner.x, + (GLsizei)regs.viewport_corner.y, viewport_width, viewport_height); // Sync bound texture(s), upload if not cached @@ -717,7 +1017,7 @@ void RasterizerOpenGL::SyncDrawState() { MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); void RasterizerOpenGL::ReloadColorBuffer() { - u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); + u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); if (color_buffer == nullptr) return; @@ -733,7 +1033,7 @@ void RasterizerOpenGL::ReloadColorBuffer() { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_color_texture.width) * bytes_per_pixel; + u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel); @@ -752,13 +1052,11 @@ void RasterizerOpenGL::ReloadColorBuffer() { } void RasterizerOpenGL::ReloadDepthBuffer() { - PAddr depth_buffer_addr = Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress(); - - if (depth_buffer_addr == 0) + if (cached_fb_depth_addr == 0) return; // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil - u8* depth_buffer = Memory::GetPhysicalPointer(depth_buffer_addr); + u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); if (depth_buffer == nullptr) return; @@ -779,7 +1077,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width); + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width); u8* pixel = depth_buffer + dst_offset; u32 depth_stencil = *(u32*)pixel; @@ -791,7 +1089,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp; u8* pixel = depth_buffer + dst_offset; memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel); @@ -821,8 +1119,8 @@ Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit"); MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); void RasterizerOpenGL::CommitColorBuffer() { - if (last_fb_color_addr != 0) { - u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr); + if (cached_fb_color_addr != 0) { + u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); if (color_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); @@ -846,7 +1144,7 @@ void RasterizerOpenGL::CommitColorBuffer() { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_pixel_index = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel); @@ -857,9 +1155,9 @@ void RasterizerOpenGL::CommitColorBuffer() { } void RasterizerOpenGL::CommitDepthBuffer() { - if (last_fb_depth_addr != 0) { + if (cached_fb_depth_addr != 0) { // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. - u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr); + u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); if (depth_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); @@ -888,7 +1186,7 @@ void RasterizerOpenGL::CommitDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width); + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width); u8* pixel = depth_buffer + dst_offset; u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index]; @@ -900,7 +1198,7 @@ void RasterizerOpenGL::CommitDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp; u8* pixel = depth_buffer + dst_offset; memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 5ba898189..fc85aa3ff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -14,9 +14,11 @@ #include "common/hash.h" #include "video_core/pica.h" -#include "video_core/hwrasterizer_base.h" +#include "video_core/pica_state.h" +#include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/shader/shader_interpreter.h" /** @@ -71,6 +73,59 @@ struct PicaShaderConfig { regs.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; + // Fragment lighting + + res.lighting.enable = !regs.lighting.disable; + res.lighting.src_num = regs.lighting.num_lights + 1; + + for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { + unsigned num = regs.lighting.light_enable.GetNum(light_index); + const auto& light = regs.lighting.light[num]; + res.lighting.light[light_index].num = num; + res.lighting.light[light_index].directional = light.directional != 0; + res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); + res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); + } + + res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; + res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + + res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; + res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + + res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; + res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + + res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; + res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + + res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; + res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + + res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; + res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + + res.lighting.config = regs.lighting.config; + res.lighting.fresnel_selector = regs.lighting.fresnel_selector; + res.lighting.bump_mode = regs.lighting.bump_mode; + res.lighting.bump_selector = regs.lighting.bump_selector; + res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; + res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; + return res; } @@ -86,9 +141,37 @@ struct PicaShaderConfig { return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; }; - Pica::Regs::CompareFunc alpha_test_func; + Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; - u8 combiner_buffer_input; + u8 combiner_buffer_input = 0; + + struct { + struct { + unsigned num = 0; + bool directional = false; + bool two_sided_diffuse = false; + bool dist_atten_enable = false; + GLfloat dist_atten_scale = 0.0f; + GLfloat dist_atten_bias = 0.0f; + } light[8]; + + bool enable = false; + unsigned src_num = 0; + Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; + unsigned bump_selector = 0; + bool bump_renorm = false; + bool clamp_highlights = false; + + Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; + Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; + + struct { + bool enable = false; + bool abs_input = false; + Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; + float scale = 1.0f; + } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; }; namespace std { @@ -102,37 +185,22 @@ struct hash<PicaShaderConfig> { } // namespace std -class RasterizerOpenGL : public HWRasterizer { +class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: RasterizerOpenGL(); ~RasterizerOpenGL() override; - /// Initialize API-specific GPU objects void InitObjects() override; - - /// Reset the rasterizer, such as flushing all caches and updating all state void Reset() override; - - /// Queues the primitive formed by the given vertices for rendering void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) override; - - /// Draw the current batch of triangles void DrawTriangles() override; - - /// Commit the rasterizer's framebuffer contents immediately to the current 3DS memory framebuffer - void CommitFramebuffer() override; - - /// Notify rasterizer that the specified PICA register has been changed + void FlushFramebuffer() override; void NotifyPicaRegisterChanged(u32 id) override; - - /// Notify rasterizer that the specified 3DS memory region will be read from after this notification - void NotifyPreRead(PAddr addr, u32 size) override; - - /// Notify rasterizer that a 3DS memory region has been changed - void NotifyFlush(PAddr addr, u32 size) override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; /// OpenGL shader generated for a given Pica register state struct PicaShader { @@ -182,7 +250,7 @@ private: /// Structure that the hardware rendered vertices are composed of struct HardwareVertex { - HardwareVertex(const Pica::Shader::OutputVertex& v) { + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { position[0] = v.pos.x.ToFloat32(); position[1] = v.pos.y.ToFloat32(); position[2] = v.pos.z.ToFloat32(); @@ -197,6 +265,19 @@ private: tex_coord1[1] = v.tc1.y.ToFloat32(); tex_coord2[0] = v.tc2.x.ToFloat32(); tex_coord2[1] = v.tc2.y.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + for (float& x : normquat) { + x = -x; + } + } } GLfloat position[4]; @@ -204,19 +285,31 @@ private: GLfloat tex_coord0[2]; GLfloat tex_coord1[2]; GLfloat tex_coord2[2]; + GLfloat normquat[4]; + GLfloat view[3]; + }; + + struct LightSrc { + alignas(16) GLvec3 specular_0; + alignas(16) GLvec3 specular_1; + alignas(16) GLvec3 diffuse; + alignas(16) GLvec3 ambient; + alignas(16) GLvec3 position; }; /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned struct UniformData { // A vec4 color for each of the six tev stages - std::array<GLfloat, 4> const_color[6]; - std::array<GLfloat, 4> tev_combiner_buffer_color; + GLvec4 const_color[6]; + GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; - INSERT_PADDING_BYTES(12); + GLfloat depth_offset; + alignas(16) GLvec3 lighting_global_ambient; + LightSrc light_src[8]; }; - static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec"); + static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Reconfigure the OpenGL color texture to use the given format and dimensions void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); @@ -233,6 +326,9 @@ private: /// Syncs the cull mode to match the PICA register void SyncCullMode(); + /// Syncs the depth scale and offset to match the PICA registers + void SyncDepthModifiers(); + /// Syncs the blend enabled status to match the PICA register void SyncBlendEnabled(); @@ -260,6 +356,27 @@ private: /// Syncs the TEV combiner color buffer to match the PICA register void SyncCombinerColor(); + /// Syncs the lighting global ambient color to match the PICA register + void SyncGlobalAmbient(); + + /// Syncs the lighting lookup tables + void SyncLightingLUT(unsigned index); + + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); + + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); + + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); + + /// Syncs the specified light's specular 0 color to match the PICA register + void SyncLightSpecular0(int light_index); + + /// Syncs the specified light's specular 1 color to match the PICA register + void SyncLightSpecular1(int light_index); + /// Syncs the remaining OpenGL drawing state to match the current PICA state void SyncDrawState(); @@ -289,8 +406,8 @@ private: OpenGLState state; - PAddr last_fb_color_addr; - PAddr last_fb_depth_addr; + PAddr cached_fb_color_addr; + PAddr cached_fb_depth_addr; // Hardware rasterizer std::array<SamplerInfo, 3> texture_samplers; @@ -302,6 +419,7 @@ private: struct { UniformData data; + bool lut_dirty[6]; bool dirty; } uniform_block_data; @@ -309,4 +427,7 @@ private: OGLBuffer vertex_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; + + std::array<OGLTexture, 6> lighting_lut; + std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 10d4ab0b6..a9ad46fe0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -15,7 +15,7 @@ #include "video_core/renderer_opengl/pica_to_gl.h" RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FullFlush(); + InvalidateAll(); } MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); @@ -58,8 +58,7 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text } } -void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size, bool ignore_hash) { - // Flush any texture that falls in the flushed region +void RasterizerCacheOpenGL::InvalidateInRange(PAddr addr, u32 size, bool ignore_hash) { // TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound auto cache_upper_bound = texture_cache.upper_bound(addr + size); @@ -77,6 +76,6 @@ void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size, bool ignore_hash) } } -void RasterizerCacheOpenGL::FullFlush() { +void RasterizerCacheOpenGL::InvalidateAll() { texture_cache.clear(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 98a48ffbe..b69651427 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -23,11 +23,11 @@ public: LoadAndBindTexture(state, texture_unit, Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format)); } - /// Flush any cached resource that touches the flushed region - void NotifyFlush(PAddr addr, u32 size, bool ignore_hash = false); + /// Invalidate any cached resource intersecting the specified region. + void InvalidateInRange(PAddr addr, u32 size, bool ignore_hash = false); - /// Flush all cached OpenGL resources tracked by this cache manager - void FullFlush(); + /// Invalidate all cached OpenGL resources tracked by this cache manager + void InvalidateAll(); private: struct CachedTexture { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 498c506e7..ee4b54ab9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,12 +32,10 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, out += "primary_color"; break; case Source::PrimaryFragmentColor: - // HACK: Until we implement fragment lighting, use primary_color - out += "primary_color"; + out += "primary_fragment_color"; break; case Source::SecondaryFragmentColor: - // HACK: Until we implement fragment lighting, use zero - out += "vec4(0.0)"; + out += "secondary_fragment_color"; break; case Source::Texture0: out += "texture(tex[0], texcoord[0])"; @@ -320,25 +318,229 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; } +/// Writes the code to emulate fragment lighting +static void WriteLighting(std::string& out, const PicaShaderConfig& config) { + // Define lighting globals + out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec3 light_vector = vec3(0.0);\n" + "vec3 refl_value = vec3(0.0);\n"; + + // Compute fragment normals + if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { + // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture + std::string bump_selector = std::to_string(config.lighting.bump_selector); + out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; + + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result + if (config.lighting.bump_renorm) { + std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; + out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; + } + } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { + // Bump mapping is enabled using a tangent map + LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); + UNIMPLEMENTED(); + } else { + // No bump mapping - surface local normal is just a unit normal + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + } + + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace + out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; + + // Gets the index into the specified lookup table for specular lighting + auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { + const std::string half_angle = "normalize(normalize(view) + light_vector)"; + std::string index; + switch (input) { + case Regs::LightingLutInput::NH: + index = "dot(normal, " + half_angle + ")"; + break; + + case Regs::LightingLutInput::VH: + index = std::string("dot(normalize(view), " + half_angle + ")"); + break; + + case Regs::LightingLutInput::NV: + index = std::string("dot(normal, normalize(view))"); + break; + + case Regs::LightingLutInput::LN: + index = std::string("dot(light_vector, normal)"); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); + UNIMPLEMENTED(); + break; + } + + if (abs) { + // LUT index is in the range of (0.0, 1.0) + index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; + } else { + // LUT index is in the range of (-1.0, 1.0) + index = "clamp(" + index + ", -1.0, 1.0)"; + return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; + } + + return std::string(); + }; + + // Gets the lighting lookup table value given the specified sampler and index + auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) { + return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " + + lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]"); + }; + + // Write the code to emulate each enabled light + for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { + const auto& light_config = config.lighting.light[light_index]; + std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; + + // Compute light vector (directional or positional) + if (light_config.directional) + out += "light_vector = normalize(" + light_src + ".position);\n"; + else + out += "light_vector = normalize(" + light_src + ".position + view);\n"; + + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided + std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; + + // If enabled, compute distance attenuation value + std::string dist_atten = "1.0"; + if (light_config.dist_atten_enable) { + std::string scale = std::to_string(light_config.dist_atten_scale); + std::string bias = std::to_string(light_config.dist_atten_bias); + std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; + index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; + const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); + dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); + } + + // If enabled, clamp specular component if lighting result is negative + std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + + // Specular 0 component + std::string d0_lut_value = "1.0"; + if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { + // Lookup specular "distribution 0" LUT value + std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; + } + std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; + out += "refl_value.r = " + value + ";\n"; + } else { + out += "refl_value.r = 1.0;\n"; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; + out += "refl_value.g = " + value + ";\n"; + } else { + out += "refl_value.g = refl_value.r;\n"; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; + out += "refl_value.b = " + value + ";\n"; + } else { + out += "refl_value.b = refl_value.r;\n"; + } + + // Specular 1 component + std::string d1_lut_value = "1.0"; + if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { + // Lookup specular "distribution 1" LUT value + std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; + } + std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; + + // Fresnel + if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { + // Lookup fresnel LUT value + std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; + + // Enabled for difffuse lighting alpha component + if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + out += "diffuse_sum.a *= " + value + ";\n"; + + // Enabled for the specular lighting alpha component + if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + out += "specular_sum.a *= " + value + ";\n"; + } + + // Compute primary fragment color (diffuse lighting) function + out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; + + // Compute secondary fragment color (specular lighting) function + out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; + } + + // Sum final lighting result + out += "diffuse_sum.rgb += lighting_global_ambient;\n"; + out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n"; + out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; +} + std::string GenerateFragmentShader(const PicaShaderConfig& config) { std::string out = R"( #version 330 core #define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 +#define LIGHTING_LUT_SIZE 256 +#define FLOAT_255 (255.0 / 256.0) in vec4 primary_color; in vec2 texcoord[3]; +in vec4 normquat; +in vec3 view; out vec4 color; +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; +}; + layout (std140) uniform shader_data { vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; int alphatest_ref; + float depth_offset; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; }; uniform sampler2D tex[3]; +uniform sampler1D lut[6]; + +// Rotate the vector v by the quaternion q +vec3 quaternion_rotate(vec4 q, vec3 v) { + return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); +} void main() { +vec4 primary_fragment_color = vec4(0.0); +vec4 secondary_fragment_color = vec4(0.0); )"; // Do not do any sort of processing if it's obvious we're not going to pass the alpha test @@ -347,6 +549,9 @@ void main() { return out; } + if (config.lighting.enable) + WriteLighting(out, config); + out += "vec4 combiner_buffer = vec4(0.0);\n"; out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; out += "vec4 last_tex_env_out = vec4(0.0);\n"; @@ -360,29 +565,37 @@ void main() { out += ") discard;\n"; } - out += "color = last_tex_env_out;\n}"; + out += "color = last_tex_env_out;\n"; + out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; return out; } std::string GenerateVertexShader() { std::string out = "#version 330 core\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; out += R"( out vec4 primary_color; out vec2 texcoord[3]; +out vec4 normquat; +out vec3 view; void main() { primary_color = vert_color; texcoord[0] = vert_texcoord0; texcoord[1] = vert_texcoord1; texcoord[2] = vert_texcoord2; - gl_Position = vec4(vert_position.x, -vert_position.y, -vert_position.z, vert_position.w); + normquat = vert_normquat; + view = vert_view; + gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); } )"; diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 046aae14f..097242f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -14,6 +14,8 @@ enum Attributes { ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, }; /** diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index c44497fc3..08e4d0b54 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -11,6 +11,7 @@ OpenGLState::OpenGLState() { // These all match default OpenGL values cull.enabled = false; cull.mode = GL_BACK; + cull.front_face = GL_CCW; depth.test_enabled = false; depth.test_func = GL_LESS; @@ -47,6 +48,10 @@ OpenGLState::OpenGLState() { texture_unit.sampler = 0; } + for (auto& lut : lighting_lut) { + lut.texture_1d = 0; + } + draw.framebuffer = 0; draw.vertex_array = 0; draw.vertex_buffer = 0; @@ -67,6 +72,10 @@ void OpenGLState::Apply() { glCullFace(cull.mode); } + if (cull.front_face != cur_state.cull.front_face) { + glFrontFace(cull.front_face); + } + // Depth test if (depth.test_enabled != cur_state.depth.test_enabled) { if (depth.test_enabled) { @@ -165,6 +174,14 @@ void OpenGLState::Apply() { } } + // Lighting LUTs + for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { + if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { + glActiveTexture(GL_TEXTURE3 + i); + glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); + } + } + // Framebuffer if (draw.framebuffer != cur_state.draw.framebuffer) { glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 84b3d49bc..e848058d7 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -11,6 +11,7 @@ public: struct { bool enabled; // GL_CULL_FACE GLenum mode; // GL_CULL_FACE_MODE + GLenum front_face; // GL_FRONT_FACE } cull; struct { @@ -61,6 +62,10 @@ public: } texture_units[3]; struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } lighting_lut[6]; + + struct { GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 04c1d1a34..3d6c4e9e5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -10,6 +10,9 @@ #include "video_core/pica.h" +using GLvec3 = std::array<GLfloat, 3>; +using GLvec4 = std::array<GLfloat, 4>; + namespace PicaToGL { inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { @@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) { return stencil_op_table[(unsigned)action]; } -inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { +inline GLvec4 ColorRGBA8(const u32 color) { return { { (color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f, (color >> 16 & 0xFF) / 255.0f, @@ -183,4 +186,11 @@ inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { } }; } +inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { + return { { color.r / 255.0f, + color.g / 255.0f, + color.b / 255.0f + } }; +} + } // namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1420229cc..11c4d0daf 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -81,8 +81,8 @@ struct ScreenRectVertex { * The projection part of the matrix is trivial, hence these operations are represented * by a 3x2 matrix. */ -static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) { - std::array<GLfloat, 3*2> matrix; +static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { + std::array<GLfloat, 3 * 2> matrix; matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; @@ -93,7 +93,6 @@ static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const /// RendererOpenGL constructor RendererOpenGL::RendererOpenGL() { - hw_rasterizer.reset(new RasterizerOpenGL()); resolution_width = std::max(VideoCore::kScreenTopWidth, VideoCore::kScreenBottomWidth); resolution_height = VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight; } @@ -157,15 +156,7 @@ void RendererOpenGL::SwapBuffers() { profiler.BeginFrame(); - bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled; - if (Settings::values.use_hw_renderer != hw_renderer_enabled) { - // TODO: Save new setting value to config file for next startup - Settings::values.use_hw_renderer = hw_renderer_enabled; - - if (Settings::values.use_hw_renderer) { - hw_rasterizer->Reset(); - } - } + RefreshRasterizerSetting(); if (Pica::g_debug_context && Pica::g_debug_context->recorder) { Pica::g_debug_context->recorder->FrameFinished(); @@ -286,8 +277,6 @@ void RendererOpenGL::InitOpenGLObjects() { state.texture_units[0].texture_2d = 0; state.Apply(); - - hw_rasterizer->InitObjects(); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, @@ -405,8 +394,58 @@ void RendererOpenGL::SetWindow(EmuWindow* window) { render_window = window; } +static const char* GetSource(GLenum source) { +#define RET(s) case GL_DEBUG_SOURCE_##s: return #s + switch (source) { + RET(API); + RET(WINDOW_SYSTEM); + RET(SHADER_COMPILER); + RET(THIRD_PARTY); + RET(APPLICATION); + RET(OTHER); + default: + UNREACHABLE(); + } +#undef RET +} + +static const char* GetType(GLenum type) { +#define RET(t) case GL_DEBUG_TYPE_##t: return #t + switch (type) { + RET(ERROR); + RET(DEPRECATED_BEHAVIOR); + RET(UNDEFINED_BEHAVIOR); + RET(PORTABILITY); + RET(PERFORMANCE); + RET(OTHER); + RET(MARKER); + default: + UNREACHABLE(); + } +#undef RET +} + +static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, + const GLchar* message, const void* user_param) { + Log::Level level; + switch (severity) { + case GL_DEBUG_SEVERITY_HIGH: + level = Log::Level::Error; + break; + case GL_DEBUG_SEVERITY_MEDIUM: + level = Log::Level::Warning; + break; + case GL_DEBUG_SEVERITY_NOTIFICATION: + case GL_DEBUG_SEVERITY_LOW: + level = Log::Level::Debug; + break; + } + LOG_GENERIC(Log::Class::Render_OpenGL, level, "%s %s %d: %s", + GetSource(source), GetType(type), id, message); +} + /// Initialize the renderer -void RendererOpenGL::Init() { +bool RendererOpenGL::Init() { render_window->MakeCurrent(); // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders @@ -415,10 +454,23 @@ void RendererOpenGL::Init() { exit(-1); } + if (GLAD_GL_KHR_debug) { + glEnable(GL_DEBUG_OUTPUT); + glDebugMessageCallback(DebugHandler, nullptr); + } + LOG_INFO(Render_OpenGL, "GL_VERSION: %s", glGetString(GL_VERSION)); LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", glGetString(GL_VENDOR)); LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", glGetString(GL_RENDERER)); + if (!GLAD_GL_VERSION_3_3) { + return false; + } + InitOpenGLObjects(); + + RefreshRasterizerSetting(); + + return true; } /// Shutdown the renderer diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b42df7654..fe4d142a5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -31,7 +31,7 @@ public: void SetWindow(EmuWindow* window) override; /// Initialize the renderer - void Init() override; + bool Init() override; /// Shutdown the renderer void ShutDown() override; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 59f54236b..5e8930476 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -14,6 +14,7 @@ #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/video_core.h" #include "shader.h" @@ -134,16 +135,18 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); } - LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", + LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " + "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); + ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), + ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); return ret; } -DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { +DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { UnitState<true> state; state.program_counter = config.main_offset; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 1c6fa592c..7af8f1fa1 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -37,17 +37,19 @@ struct OutputVertex { Math::Vec4<float24> color; Math::Vec2<float24> tc0; Math::Vec2<float24> tc1; - float24 pad[6]; + INSERT_PADDING_WORDS(2); + Math::Vec3<float24> view; + INSERT_PADDING_WORDS(1); Math::Vec2<float24> tc2; // Padding for optimal alignment - float24 pad2[4]; + INSERT_PADDING_WORDS(4); // Attributes used to store intermediate results // position after perspective divide Math::Vec3<float24> screenpos; - float24 pad3; + INSERT_PADDING_WORDS(1); // Linear interpolation // factor: 0=this, 1=vtx @@ -75,6 +77,22 @@ struct OutputVertex { static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); +/// Vertex shader memory +struct ShaderSetup { + struct { + // The float uniforms are accessed by the shader JIT using SSE instructions, and are + // therefore required to be 16-byte aligned. + alignas(16) Math::Vec4<float24> f[96]; + + std::array<bool, 16> b; + std::array<Math::Vec4<u8>, 4> i; + } uniforms; + + Math::Vec4<float24> default_attributes[16]; + + std::array<u32, 1024> program_code; + std::array<u32, 1024> swizzle_data; +}; // Helper structure used to keep track of data useful for inspection of shader emulation template<bool full_debugging> @@ -258,9 +276,9 @@ struct UnitState { struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore // required to be 16-byte aligned. - Math::Vec4<float24> MEMORY_ALIGNED16(input[16]); - Math::Vec4<float24> MEMORY_ALIGNED16(output[16]); - Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]); + alignas(16) Math::Vec4<float24> input[16]; + alignas(16) Math::Vec4<float24> output[16]; + alignas(16) Math::Vec4<float24> temporary[16]; } registers; static_assert(std::is_pod<Registers>::value, "Structure is not POD"); @@ -345,7 +363,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr * @param setup Setup object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ -DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); +DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); } // namespace Shader diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 7b0c20b74..79fcc56b9 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -7,6 +7,7 @@ #include <nihstro/shader_bytecode.h> #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" @@ -515,7 +516,8 @@ void RunInterpreter(UnitState<Debug>& state) { case OpCode::Id::JMPU: Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); - if (uniforms.b[instr.flow_control.bool_uniform_id]) { + + if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { state.program_counter = instr.flow_control.dest_offset - 1; } break; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 00415e402..5083d7e54 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -11,6 +11,8 @@ #include "shader.h" #include "shader_jit_x64.h" +#include "video_core/pica_state.h" + namespace Pica { namespace Shader { @@ -653,7 +655,7 @@ void JitCompiler::Compile_IF(Instruction instr) { FixupBranch b = J_CC(CC_Z, true); // Compile the code that corresponds to the condition evaluating as true - Compile_Block(instr.flow_control.dest_offset - 1); + Compile_Block(instr.flow_control.dest_offset); // If there isn't an "ELSE" condition, we are done here if (instr.flow_control.num_instructions == 0) { @@ -667,7 +669,7 @@ void JitCompiler::Compile_IF(Instruction instr) { // This code corresponds to the "ELSE" condition // Comple the code that corresponds to the condition evaluating as false - Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions - 1); + Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions); SetJumpTarget(b2); } @@ -691,7 +693,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) { auto loop_start = GetCodePtr(); - Compile_Block(instr.flow_control.dest_offset); + Compile_Block(instr.flow_control.dest_offset + 1); ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 @@ -710,19 +712,21 @@ void JitCompiler::Compile_JMP(Instruction instr) { else UNREACHABLE(); - FixupBranch b = J_CC(CC_NZ, true); + bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && + (instr.flow_control.num_instructions & 1); + FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); Compile_Block(instr.flow_control.dest_offset); SetJumpTarget(b); } -void JitCompiler::Compile_Block(unsigned stop) { +void JitCompiler::Compile_Block(unsigned end) { // Save current offset pointer unsigned* prev_offset_ptr = offset_ptr; unsigned offset = *prev_offset_ptr; - while (offset <= stop) + while (offset < end) Compile_NextInstr(&offset); // Restore current offset pointer diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 3afbceccf..5ad2d9606 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -61,7 +61,7 @@ public: void Compile_MAD(Instruction instr); private: - void Compile_Block(unsigned stop); + void Compile_Block(unsigned end); void Compile_NextInstr(unsigned* offset); void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); diff --git a/src/video_core/swrasterizer.cpp b/src/video_core/swrasterizer.cpp new file mode 100644 index 000000000..03df15b01 --- /dev/null +++ b/src/video_core/swrasterizer.cpp @@ -0,0 +1,16 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/clipper.h" +#include "video_core/swrasterizer.h" + +namespace VideoCore { + +void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) { + Pica::Clipper::ProcessTriangle(v0, v1, v2); +} + +} diff --git a/src/video_core/swrasterizer.h b/src/video_core/swrasterizer.h new file mode 100644 index 000000000..9a9a76d7a --- /dev/null +++ b/src/video_core/swrasterizer.h @@ -0,0 +1,26 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +#include "video_core/rasterizer_interface.h" + +namespace VideoCore { + +class SWRasterizer : public RasterizerInterface { + void InitObjects() override {} + void Reset() override {} + void AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) override; + void DrawTriangles() override {} + void FlushFramebuffer() override {} + void NotifyPicaRegisterChanged(u32 id) override {} + void FlushRegion(PAddr addr, u32 size) override {} + void InvalidateRegion(PAddr addr, u32 size) override {} +}; + +} diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index eaddda668..ee5e50df1 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -2,7 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <memory> + #include "common/emu_window.h" +#include "common/make_unique.h" #include "common/logging/log.h" #include "core/core.h" @@ -18,29 +21,33 @@ namespace VideoCore { -EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window -RendererBase* g_renderer = nullptr; ///< Renderer plugin +EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window +std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin std::atomic<bool> g_hw_renderer_enabled; std::atomic<bool> g_shader_jit_enabled; /// Initialize the video core -void Init(EmuWindow* emu_window) { +bool Init(EmuWindow* emu_window) { Pica::Init(); g_emu_window = emu_window; - g_renderer = new RendererOpenGL(); + g_renderer = Common::make_unique<RendererOpenGL>(); g_renderer->SetWindow(g_emu_window); - g_renderer->Init(); - - LOG_DEBUG(Render, "initialized OK"); + if (g_renderer->Init()) { + LOG_DEBUG(Render, "initialized OK"); + } else { + LOG_ERROR(Render, "initialization failed !"); + return false; + } + return true; } /// Shutdown the video core void Shutdown() { Pica::Shutdown(); - delete g_renderer; + g_renderer.reset(); LOG_DEBUG(Render, "shutdown OK"); } diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 2867bf03e..bca67fb8c 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -5,6 +5,7 @@ #pragma once #include <atomic> +#include <memory> class EmuWindow; class RendererBase; @@ -29,8 +30,8 @@ static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height // Video core renderer // --------------------- -extern RendererBase* g_renderer; ///< Renderer plugin -extern EmuWindow* g_emu_window; ///< Emu window +extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin +extern EmuWindow* g_emu_window; ///< Emu window // TODO: Wrap these in a user settings struct along with any other graphics settings (often set from qt ui) extern std::atomic<bool> g_hw_renderer_enabled; @@ -40,7 +41,7 @@ extern std::atomic<bool> g_shader_jit_enabled; void Start(); /// Initialize the video core -void Init(EmuWindow* emu_window); +bool Init(EmuWindow* emu_window); /// Shutdown the video core void Shutdown(); |