diff options
author | bunnei <bunneidev@gmail.com> | 2014-08-25 22:12:10 +0200 |
---|---|---|
committer | bunnei <bunneidev@gmail.com> | 2014-08-25 22:12:10 +0200 |
commit | 97fd8fc38d4f9c288779cddb06538860124c6263 (patch) | |
tree | bc99e0fceaae732f9c8d4831fcdb8f661b49ccb8 /src/video_core | |
parent | Merge pull request #75 from xsacha/qt5 (diff) | |
parent | Pica/Rasterizer: Clarify a TODO. (diff) | |
download | yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.gz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.bz2 yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.lz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.xz yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.tar.zst yuzu-97fd8fc38d4f9c288779cddb06538860124c6263.zip |
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 44 | ||||
-rw-r--r-- | src/video_core/command_processor.h | 13 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 522 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 66 | ||||
-rw-r--r-- | src/video_core/gpu_debugger.h | 63 | ||||
-rw-r--r-- | src/video_core/math.h | 233 | ||||
-rw-r--r-- | src/video_core/pica.h | 153 | ||||
-rw-r--r-- | src/video_core/primitive_assembly.cpp | 28 | ||||
-rw-r--r-- | src/video_core/primitive_assembly.h | 38 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 222 | ||||
-rw-r--r-- | src/video_core/vertex_shader.cpp | 51 | ||||
-rw-r--r-- | src/video_core/vertex_shader.h | 81 | ||||
-rw-r--r-- | src/video_core/video_core.vcxproj | 2 | ||||
-rw-r--r-- | src/video_core/video_core.vcxproj.filters | 15 |
15 files changed, 1308 insertions, 225 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 8e7b93acb..71a1b5ecc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -5,6 +5,7 @@ set(SRCS clipper.cpp utils.cpp vertex_shader.cpp video_core.cpp + debug_utils/debug_utils.cpp renderer_opengl/renderer_opengl.cpp) set(HEADERS clipper.h @@ -17,6 +18,7 @@ set(HEADERS clipper.h renderer_base.h vertex_shader.h video_core.h + debug_utils/debug_utils.h renderer_opengl/renderer_opengl.h) add_library(video_core STATIC ${SRCS} ${HEADERS}) diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 020a4da3f..9567a9849 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -2,12 +2,14 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include "clipper.h" #include "command_processor.h" #include "math.h" #include "pica.h" #include "primitive_assembly.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" namespace Pica { @@ -23,15 +25,24 @@ static u32 uniform_write_buffer[4]; static u32 vs_binary_write_offset = 0; static u32 vs_swizzle_write_offset = 0; -static inline void WritePicaReg(u32 id, u32 value) { +static inline void WritePicaReg(u32 id, u32 value, u32 mask) { + + if (id >= registers.NumIds()) + return; + + // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value u32 old_value = registers[id]; - registers[id] = value; + registers[id] = (old_value & ~mask) | (value & mask); + + DebugUtils::OnPicaRegWrite(id, registers[id]); switch(id) { // It seems like these trigger vertex rendering case PICA_REG_INDEX(trigger_draw): case PICA_REG_INDEX(trigger_draw_indexed): { + DebugUtils::DumpTevStageConfig(registers.GetTevStages()); + const auto& attribute_config = registers.vertex_attributes; const u8* const base_address = Memory::GetPointer(attribute_config.GetBaseAddress()); @@ -68,6 +79,10 @@ static inline void WritePicaReg(u32 id, u32 value) { const u16* index_address_16 = (u16*)index_address_8; bool index_u16 = (bool)index_info.format; + DebugUtils::GeometryDumper geometry_dumper; + PrimitiveAssembler<VertexShader::OutputVertex> clipper_primitive_assembler(registers.triangle_topology.Value()); + PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(registers.triangle_topology.Value()); + for (int index = 0; index < registers.num_vertices; ++index) { int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; @@ -95,14 +110,28 @@ static inline void WritePicaReg(u32 id, u32 value) { input.attr[i][comp].ToFloat32()); } } + + // NOTE: When dumping geometry, we simply assume that the first input attribute + // corresponds to the position for now. + DebugUtils::GeometryDumper::Vertex dumped_vertex = { + input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32() + }; + using namespace std::placeholders; + dumping_primitive_assembler.SubmitVertex(dumped_vertex, + std::bind(&DebugUtils::GeometryDumper::AddTriangle, + &geometry_dumper, _1, _2, _3)); + + // Send to vertex shader VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); if (is_indexed) { // TODO: Add processed vertex to vertex cache! } - PrimitiveAssembly::SubmitVertex(output); + // Send to triangle clipper + clipper_primitive_assembler.SubmitVertex(output, Clipper::ProcessTriangle); } + geometry_dumper.Dump(); break; } @@ -207,14 +236,17 @@ static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { u32* read_pointer = (u32*)first_command_word; - // TODO: Take parameter mask into consideration! + const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) | + ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) | + ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) | + ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u); - WritePicaReg(header.cmd_id, *read_pointer); + WritePicaReg(header.cmd_id, *read_pointer, write_mask); read_pointer += 2; for (int i = 1; i < 1+header.extra_data_length; ++i) { u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); - WritePicaReg(cmd, *read_pointer); + WritePicaReg(cmd, *read_pointer, write_mask); ++read_pointer; } diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index 6b6241a25..955f9daec 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h @@ -17,11 +17,22 @@ union CommandHeader { u32 hex; BitField< 0, 16, u32> cmd_id; + + // parameter_mask: + // Mask applied to the input value to make it possible to update + // parts of a register without overwriting its other fields. + // first bit: 0x000000FF + // second bit: 0x0000FF00 + // third bit: 0x00FF0000 + // fourth bit: 0xFF000000 BitField<16, 4, u32> parameter_mask; + BitField<20, 11, u32> extra_data_length; + BitField<31, 1, u32> group_commands; }; -static_assert(std::is_standard_layout<CommandHeader>::value == true, "CommandHeader does not use standard layout"); +static_assert(std::is_standard_layout<CommandHeader>::value == true, + "CommandHeader does not use standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); void ProcessCommandList(const u32* list, u32 size); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp new file mode 100644 index 000000000..48e6dd182 --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -0,0 +1,522 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include <algorithm> +#include <map> +#include <fstream> +#include <mutex> +#include <string> + +#ifdef HAVE_PNG +#include <png.h> +#endif + +#include "common/file_util.h" + +#include "video_core/pica.h" + +#include "debug_utils.h" + +namespace Pica { + +namespace DebugUtils { + +void GeometryDumper::AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2) { + vertices.push_back(v0); + vertices.push_back(v1); + vertices.push_back(v2); + + int num_vertices = vertices.size(); + faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 }); +} + +void GeometryDumper::Dump() { + // NOTE: Permanently enabling this just trashes the hard disk for no reason. + // Hence, this is currently disabled. + return; + + static int index = 0; + std::string filename = std::string("geometry_dump") + std::to_string(++index) + ".obj"; + + std::ofstream file(filename); + + for (const auto& vertex : vertices) { + file << "v " << vertex.pos[0] + << " " << vertex.pos[1] + << " " << vertex.pos[2] << std::endl; + } + + for (const Face& face : faces) { + file << "f " << 1+face.index[0] + << " " << 1+face.index[1] + << " " << 1+face.index[2] << std::endl; + } +} + +#pragma pack(1) +struct DVLBHeader { + enum : u32 { + MAGIC_WORD = 0x424C5644, // "DVLB" + }; + + u32 magic_word; + u32 num_programs; +// u32 dvle_offset_table[]; +}; +static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size"); + +struct DVLPHeader { + enum : u32 { + MAGIC_WORD = 0x504C5644, // "DVLP" + }; + + u32 magic_word; + u32 version; + u32 binary_offset; // relative to DVLP start + u32 binary_size_words; + u32 swizzle_patterns_offset; + u32 swizzle_patterns_num_entries; + u32 unk2; +}; +static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size"); + +struct DVLEHeader { + enum : u32 { + MAGIC_WORD = 0x454c5644, // "DVLE" + }; + + enum class ShaderType : u8 { + VERTEX = 0, + GEOMETRY = 1, + }; + + u32 magic_word; + u16 pad1; + ShaderType type; + u8 pad2; + u32 main_offset_words; // offset within binary blob + u32 endmain_offset_words; + u32 pad3; + u32 pad4; + u32 constant_table_offset; + u32 constant_table_size; // number of entries + u32 label_table_offset; + u32 label_table_size; + u32 output_register_table_offset; + u32 output_register_table_size; + u32 uniform_table_offset; + u32 uniform_table_size; + u32 symbol_table_offset; + u32 symbol_table_size; + +}; +static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size"); +#pragma pack() + +void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, + u32 main_offset, const Regs::VSOutputAttributes* output_attributes) +{ + // NOTE: Permanently enabling this just trashes hard disks for no reason. + // Hence, this is currently disabled. + return; + + struct StuffToWrite { + u8* pointer; + u32 size; + }; + std::vector<StuffToWrite> writing_queue; + u32 write_offset = 0; + + auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) { + writing_queue.push_back({pointer, size}); + u32 old_write_offset = write_offset; + write_offset += size; + return old_write_offset; + }; + + // First off, try to translate Pica state (one enum for output attribute type and component) + // into shbin format (separate type and component mask). + union OutputRegisterInfo { + enum Type : u64 { + POSITION = 0, + COLOR = 2, + TEXCOORD0 = 3, + TEXCOORD1 = 5, + TEXCOORD2 = 6, + }; + + BitField< 0, 64, u64> hex; + + BitField< 0, 16, Type> type; + BitField<16, 16, u64> id; + BitField<32, 4, u64> component_mask; + }; + + // This is put into a try-catch block to make sure we notice unknown configurations. + std::vector<OutputRegisterInfo> output_info_table; + for (int i = 0; i < 7; ++i) { + using OutputAttributes = Pica::Regs::VSOutputAttributes; + + // TODO: It's still unclear how the attribute components map to the register! + // Once we know that, this code probably will not make much sense anymore. + std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = { + { OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} }, + { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, + { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, + { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, + { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, + { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, + { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, + { OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} }, + { OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} }, + { OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} }, + { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, + { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, + { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, + { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } + }; + + for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ + output_attributes[i].map_x, + output_attributes[i].map_y, + output_attributes[i].map_z, + output_attributes[i].map_w }) { + if (semantic == OutputAttributes::INVALID) + continue; + + try { + OutputRegisterInfo::Type type = map.at(semantic).first; + u32 component_mask = map.at(semantic).second; + + auto it = std::find_if(output_info_table.begin(), output_info_table.end(), + [&i, &type](const OutputRegisterInfo& info) { + return info.id == i && info.type == type; + } + ); + + if (it == output_info_table.end()) { + output_info_table.push_back({}); + output_info_table.back().type = type; + output_info_table.back().component_mask = component_mask; + output_info_table.back().id = i; + } else { + it->component_mask = it->component_mask | component_mask; + } + } catch (const std::out_of_range& oor) { + _dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping"); + ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x", + (int)output_attributes[i].map_x.Value(), + (int)output_attributes[i].map_y.Value(), + (int)output_attributes[i].map_z.Value(), + (int)output_attributes[i].map_w.Value()); + } + } + } + + + struct { + DVLBHeader header; + u32 dvle_offset; + } dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE + + DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD }; + DVLEHeader dvle{ DVLEHeader::MAGIC_WORD }; + + QueueForWriting((u8*)&dvlb, sizeof(dvlb)); + u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp)); + dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle)); + + // TODO: Reduce the amount of binary code written to relevant portions + dvlp.binary_offset = write_offset - dvlp_offset; + dvlp.binary_size_words = binary_size; + QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); + + dvlp.swizzle_patterns_offset = write_offset - dvlp_offset; + dvlp.swizzle_patterns_num_entries = swizzle_size; + u32 dummy = 0; + for (int i = 0; i < swizzle_size; ++i) { + QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); + QueueForWriting((u8*)&dummy, sizeof(dummy)); + } + + dvle.main_offset_words = main_offset; + dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; + dvle.output_register_table_size = output_info_table.size(); + QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo)); + + // TODO: Create a label table for "main" + + + // Write data to file + static int dump_index = 0; + std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); + std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); + + for (auto& chunk : writing_queue) { + file.write((char*)chunk.pointer, chunk.size); + } +} + +static std::unique_ptr<PicaTrace> pica_trace; +static std::mutex pica_trace_mutex; +static int is_pica_tracing = false; + +void StartPicaTracing() +{ + if (is_pica_tracing) { + ERROR_LOG(GPU, "StartPicaTracing called even though tracing already running!"); + return; + } + + pica_trace_mutex.lock(); + pica_trace = std::unique_ptr<PicaTrace>(new PicaTrace); + + is_pica_tracing = true; + pica_trace_mutex.unlock(); +} + +bool IsPicaTracing() +{ + return is_pica_tracing; +} + +void OnPicaRegWrite(u32 id, u32 value) +{ + // Double check for is_pica_tracing to avoid pointless locking overhead + if (!is_pica_tracing) + return; + + std::unique_lock<std::mutex> lock(pica_trace_mutex); + + if (!is_pica_tracing) + return; + + pica_trace->writes.push_back({id, value}); +} + +std::unique_ptr<PicaTrace> FinishPicaTracing() +{ + if (!is_pica_tracing) { + ERROR_LOG(GPU, "FinishPicaTracing called even though tracing already running!"); + return {}; + } + + // signalize that no further tracing should be performed + is_pica_tracing = false; + + // Wait until running tracing is finished + pica_trace_mutex.lock(); + std::unique_ptr<PicaTrace> ret(std::move(pica_trace)); + pica_trace_mutex.unlock(); + return std::move(ret); +} + +void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { + // NOTE: Permanently enabling this just trashes hard disks for no reason. + // Hence, this is currently disabled. + return; + +#ifndef HAVE_PNG + return; +#else + if (!data) + return; + + // Write data to file + static int dump_index = 0; + std::string filename = std::string("texture_dump") + std::to_string(++dump_index) + std::string(".png"); + u32 row_stride = texture_config.width * 3; + + u8* buf; + + char title[] = "Citra texture dump"; + char title_key[] = "Title"; + png_structp png_ptr = nullptr; + png_infop info_ptr = nullptr; + + // Open file for writing (binary mode) + File::IOFile fp(filename, "wb"); + + // Initialize write structure + png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + if (png_ptr == nullptr) { + ERROR_LOG(GPU, "Could not allocate write struct\n"); + goto finalise; + + } + + // Initialize info structure + info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == nullptr) { + ERROR_LOG(GPU, "Could not allocate info struct\n"); + goto finalise; + } + + // Setup Exception handling + if (setjmp(png_jmpbuf(png_ptr))) { + ERROR_LOG(GPU, "Error during png creation\n"); + goto finalise; + } + + png_init_io(png_ptr, fp.GetHandle()); + + // Write header (8 bit colour depth) + png_set_IHDR(png_ptr, info_ptr, texture_config.width, texture_config.height, + 8, PNG_COLOR_TYPE_RGB /*_ALPHA*/, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + png_text title_text; + title_text.compression = PNG_TEXT_COMPRESSION_NONE; + title_text.key = title_key; + title_text.text = title; + png_set_text(png_ptr, info_ptr, &title_text, 1); + + png_write_info(png_ptr, info_ptr); + + buf = new u8[row_stride * texture_config.height]; + for (int y = 0; y < texture_config.height; ++y) { + for (int x = 0; x < texture_config.width; ++x) { + // Cf. rasterizer code for an explanation of this algorithm. + int texel_index_within_tile = 0; + for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { + int sub_tile_width = 1 << block_size_index; + int sub_tile_height = 1 << block_size_index; + + int sub_tile_index = (x & sub_tile_width) << block_size_index; + sub_tile_index += 2 * ((y & sub_tile_height) << block_size_index); + texel_index_within_tile += sub_tile_index; + } + + const int block_width = 8; + const int block_height = 8; + + int coarse_x = (x / block_width) * block_width; + int coarse_y = (y / block_height) * block_height; + + u8* source_ptr = (u8*)data + coarse_x * block_height * 3 + coarse_y * row_stride + texel_index_within_tile * 3; + buf[3 * x + y * row_stride ] = source_ptr[2]; + buf[3 * x + y * row_stride + 1] = source_ptr[1]; + buf[3 * x + y * row_stride + 2] = source_ptr[0]; + } + } + + // Write image data + for (auto y = 0; y < texture_config.height; ++y) + { + u8* row_ptr = (u8*)buf + y * row_stride; + u8* ptr = row_ptr; + png_write_row(png_ptr, row_ptr); + } + + delete[] buf; + + // End write + png_write_end(png_ptr, nullptr); + +finalise: + if (info_ptr != nullptr) png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1); + if (png_ptr != nullptr) png_destroy_write_struct(&png_ptr, (png_infopp)nullptr); +#endif +} + +void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages) +{ + using Source = Pica::Regs::TevStageConfig::Source; + using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; + using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; + using Operation = Pica::Regs::TevStageConfig::Operation; + + std::string stage_info = "Tev setup:\n"; + for (int index = 0; index < stages.size(); ++index) { + const auto& tev_stage = stages[index]; + + const std::map<Source, std::string> source_map = { + { Source::PrimaryColor, "PrimaryColor" }, + { Source::Texture0, "Texture0" }, + { Source::Constant, "Constant" }, + { Source::Previous, "Previous" }, + }; + + const std::map<ColorModifier, std::string> color_modifier_map = { + { ColorModifier::SourceColor, { "%source.rgb" } } + }; + const std::map<AlphaModifier, std::string> alpha_modifier_map = { + { AlphaModifier::SourceAlpha, "%source.a" } + }; + + std::map<Operation, std::string> combiner_map = { + { Operation::Replace, "%source1" }, + { Operation::Modulate, "(%source1 * %source2) / 255" }, + }; + + auto ReplacePattern = + [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { + size_t start = input.find(pattern); + if (start == std::string::npos) + return input; + + std::string ret = input; + ret.replace(start, pattern.length(), replacement); + return ret; + }; + auto GetColorSourceStr = + [&source_map,&color_modifier_map,&ReplacePattern](const Source& src, const ColorModifier& modifier) { + auto src_it = source_map.find(src); + std::string src_str = "Unknown"; + if (src_it != source_map.end()) + src_str = src_it->second; + + auto modifier_it = color_modifier_map.find(modifier); + std::string modifier_str = "%source.????"; + if (modifier_it != color_modifier_map.end()) + modifier_str = modifier_it->second; + + return ReplacePattern(modifier_str, "%source", src_str); + }; + auto GetColorCombinerStr = + [&](const Regs::TevStageConfig& tev_stage) { + auto op_it = combiner_map.find(tev_stage.color_op); + std::string op_str = "Unknown op (%source1, %source2, %source3)"; + if (op_it != combiner_map.end()) + op_str = op_it->second; + + op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1)); + op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2)); + return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3)); + }; + auto GetAlphaSourceStr = + [&source_map,&alpha_modifier_map,&ReplacePattern](const Source& src, const AlphaModifier& modifier) { + auto src_it = source_map.find(src); + std::string src_str = "Unknown"; + if (src_it != source_map.end()) + src_str = src_it->second; + + auto modifier_it = alpha_modifier_map.find(modifier); + std::string modifier_str = "%source.????"; + if (modifier_it != alpha_modifier_map.end()) + modifier_str = modifier_it->second; + + return ReplacePattern(modifier_str, "%source", src_str); + }; + auto GetAlphaCombinerStr = + [&](const Regs::TevStageConfig& tev_stage) { + auto op_it = combiner_map.find(tev_stage.alpha_op); + std::string op_str = "Unknown op (%source1, %source2, %source3)"; + if (op_it != combiner_map.end()) + op_str = op_it->second; + + op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); + op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); + return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); + }; + + stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n"; + } + + DEBUG_LOG(GPU, "%s", stage_info.c_str()); +} + +} // namespace + +} // namespace diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h new file mode 100644 index 000000000..8b1499bf2 --- /dev/null +++ b/src/video_core/debug_utils/debug_utils.h @@ -0,0 +1,66 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <vector> + +#include "video_core/pica.h" + +namespace Pica { + +namespace DebugUtils { + +// Simple utility class for dumping geometry data to an OBJ file +class GeometryDumper { +public: + struct Vertex { + std::array<float,3> pos; + }; + + void AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2); + + void Dump(); + +private: + struct Face { + int index[3]; + }; + + std::vector<Vertex> vertices; + std::vector<Face> faces; +}; + +void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, + u32 main_offset, const Regs::VSOutputAttributes* output_attributes); + + +// Utility class to log Pica commands. +struct PicaTrace { + struct Write : public std::pair<u32,u32> { + Write(u32 id, u32 value) : std::pair<u32,u32>(id, value) {} + + u32& Id() { return first; } + const u32& Id() const { return first; } + + u32& Value() { return second; } + const u32& Value() const { return second; } + }; + std::vector<Write> writes; +}; + +void StartPicaTracing(); +bool IsPicaTracing(); +void OnPicaRegWrite(u32 id, u32 value); +std::unique_ptr<PicaTrace> FinishPicaTracing(); + +void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); + +void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); + +} // namespace + +} // namespace diff --git a/src/video_core/gpu_debugger.h b/src/video_core/gpu_debugger.h index 2ba873457..5a81fcfcb 100644 --- a/src/video_core/gpu_debugger.h +++ b/src/video_core/gpu_debugger.h @@ -18,19 +18,6 @@ class GraphicsDebugger { public: - // A few utility structs used to expose data - // A vector of commands represented by their raw byte sequence - struct PicaCommand : public std::vector<u32> - { - const Pica::CommandProcessor::CommandHeader& GetHeader() const - { - const u32& val = at(1); - return *(Pica::CommandProcessor::CommandHeader*)&val; - } - }; - - typedef std::vector<PicaCommand> PicaCommandList; - // Base class for all objects which need to be notified about GPU events class DebuggerObserver { @@ -55,16 +42,6 @@ public: ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value()); } - /** - * @param lst command list which triggered this call - * @param is_new true if the command list was called for the first time - * @todo figure out how to make sure called functions don't keep references around beyond their life time - */ - virtual void OnCommandListCalled(const PicaCommandList& lst, bool is_new) - { - ERROR_LOG(GSP, "Command list called: %d", (int)is_new); - } - protected: const GraphicsDebugger* GetDebugger() const { @@ -93,49 +70,12 @@ public: } ); } - void CommandListCalled(u32 address, u32* command_list, u32 size_in_words) - { - if (observers.empty()) - return; - - PicaCommandList cmdlist; - for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) - { - const Pica::CommandProcessor::CommandHeader& header = *(Pica::CommandProcessor::CommandHeader*)(&parse_pointer[1]); - - cmdlist.push_back(PicaCommand()); - auto& cmd = cmdlist.back(); - - size_t size = 2 + header.extra_data_length; - size = (size + 1) / 2 * 2; // align to 8 bytes - cmd.reserve(size); - std::copy(parse_pointer, parse_pointer + size, std::back_inserter(cmd)); - - parse_pointer += size; - } - - auto obj = std::pair<u32,PicaCommandList>(address, cmdlist); - auto it = std::find(command_lists.begin(), command_lists.end(), obj); - bool is_new = (it == command_lists.end()); - if (is_new) - command_lists.push_back(obj); - - ForEachObserver([&](DebuggerObserver* observer) { - observer->OnCommandListCalled(obj.second, is_new); - } ); - } - const GSP_GPU::Command& ReadGXCommandHistory(int index) const { // TODO: Is this thread-safe? return gx_command_history[index]; } - const std::vector<std::pair<u32,PicaCommandList>>& GetCommandLists() const - { - return command_lists; - } - void RegisterObserver(DebuggerObserver* observer) { // TODO: Check for duplicates @@ -158,7 +98,4 @@ private: std::vector<DebuggerObserver*> observers; std::vector<GSP_GPU::Command> gx_command_history; - - // vector of pairs of command lists and their storage address - std::vector<std::pair<u32,PicaCommandList>> command_lists; }; diff --git a/src/video_core/math.h b/src/video_core/math.h index 7030f2cfb..83ba81235 100644 --- a/src/video_core/math.h +++ b/src/video_core/math.h @@ -39,13 +39,19 @@ template<typename T> class Vec2; template<typename T> class Vec3; template<typename T> class Vec4; +template<typename T> +static inline Vec2<T> MakeVec(const T& x, const T& y); +template<typename T> +static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z); +template<typename T> +static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w); + template<typename T> class Vec2 { public: - struct { - T x,y; - }; + T x; + T y; T* AsArray() { return &x; } @@ -68,34 +74,34 @@ public: a[0] = x; a[1] = y; } - Vec2 operator +(const Vec2& other) const + Vec2<decltype(T{}+T{})> operator +(const Vec2& other) const { - return Vec2(x+other.x, y+other.y); + return MakeVec(x+other.x, y+other.y); } void operator += (const Vec2 &other) { x+=other.x; y+=other.y; } - Vec2 operator -(const Vec2& other) const + Vec2<decltype(T{}-T{})> operator -(const Vec2& other) const { - return Vec2(x-other.x, y-other.y); + return MakeVec(x-other.x, y-other.y); } void operator -= (const Vec2& other) { x-=other.x; y-=other.y; } - Vec2 operator -() const + Vec2<decltype(-T{})> operator -() const { - return Vec2(-x,-y); + return MakeVec(-x,-y); } - Vec2 operator * (const Vec2& other) const + Vec2<decltype(T{}*T{})> operator * (const Vec2& other) const { - return Vec2(x*other.x, y*other.y); + return MakeVec(x*other.x, y*other.y); } template<typename V> - Vec2 operator * (const V& f) const + Vec2<decltype(T{}*V{})> operator * (const V& f) const { - return Vec2(x*f,y*f); + return MakeVec(x*f,y*f); } template<typename V> void operator *= (const V& f) @@ -103,9 +109,9 @@ public: x*=f; y*=f; } template<typename V> - Vec2 operator / (const V& f) const + Vec2<decltype(T{}/V{})> operator / (const V& f) const { - return Vec2(x/f,y/f); + return MakeVec(x/f,y/f); } template<typename V> void operator /= (const V& f) @@ -152,20 +158,9 @@ public: const T& t() const { return y; } // swizzlers - create a subvector of specific components - Vec2 yx() const { return Vec2(y, x); } - Vec2 vu() const { return Vec2(y, x); } - Vec2 ts() const { return Vec2(y, x); } - - // Inserters to add new elements to effectively create larger vectors containing this Vec2 - Vec3<T> InsertBeforeX(const T& value) { - return Vec3<T>(value, x, y); - } - Vec3<T> InsertBeforeY(const T& value) { - return Vec3<T>(x, value, y); - } - Vec3<T> Append(const T& value) { - return Vec3<T>(x, y, value); - } + const Vec2 yx() const { return Vec2(y, x); } + const Vec2 vu() const { return Vec2(y, x); } + const Vec2 ts() const { return Vec2(y, x); } }; template<typename T, typename V> @@ -180,10 +175,9 @@ template<typename T> class Vec3 { public: - struct - { - T x,y,z; - }; + T x; + T y; + T z; T* AsArray() { return &x; } @@ -193,7 +187,7 @@ public: template<typename T2> Vec3<T2> Cast() const { - return Vec3<T2>((T2)x, (T2)y, (T2)z); + return MakeVec<T2>((T2)x, (T2)y, (T2)z); } // Only implemented for T=int and T=float @@ -202,7 +196,7 @@ public: static Vec3 AssignToAll(const T& f) { - return Vec3<T>(f, f, f); + return MakeVec(f, f, f); } void Write(T a[3]) @@ -210,34 +204,34 @@ public: a[0] = x; a[1] = y; a[2] = z; } - Vec3 operator +(const Vec3 &other) const + Vec3<decltype(T{}+T{})> operator +(const Vec3 &other) const { - return Vec3(x+other.x, y+other.y, z+other.z); + return MakeVec(x+other.x, y+other.y, z+other.z); } void operator += (const Vec3 &other) { x+=other.x; y+=other.y; z+=other.z; } - Vec3 operator -(const Vec3 &other) const + Vec3<decltype(T{}-T{})> operator -(const Vec3 &other) const { - return Vec3(x-other.x, y-other.y, z-other.z); + return MakeVec(x-other.x, y-other.y, z-other.z); } void operator -= (const Vec3 &other) { x-=other.x; y-=other.y; z-=other.z; } - Vec3 operator -() const + Vec3<decltype(-T{})> operator -() const { - return Vec3(-x,-y,-z); + return MakeVec(-x,-y,-z); } - Vec3 operator * (const Vec3 &other) const + Vec3<decltype(T{}*T{})> operator * (const Vec3 &other) const { - return Vec3(x*other.x, y*other.y, z*other.z); + return MakeVec(x*other.x, y*other.y, z*other.z); } template<typename V> - Vec3 operator * (const V& f) const + Vec3<decltype(T{}*V{})> operator * (const V& f) const { - return Vec3(x*f,y*f,z*f); + return MakeVec(x*f,y*f,z*f); } template<typename V> void operator *= (const V& f) @@ -245,9 +239,9 @@ public: x*=f; y*=f; z*=f; } template<typename V> - Vec3 operator / (const V& f) const + Vec3<decltype(T{}/V{})> operator / (const V& f) const { - return Vec3(x/f,y/f,z/f); + return MakeVec(x/f,y/f,z/f); } template<typename V> void operator /= (const V& f) @@ -310,7 +304,7 @@ public: // swizzlers - create a subvector of specific components // e.g. Vec2 uv() { return Vec2(x,y); } // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } +#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); } #define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ _DEFINE_SWIZZLER2(a, b, a##b); \ _DEFINE_SWIZZLER2(a, b, a2##b2); \ @@ -319,27 +313,13 @@ public: _DEFINE_SWIZZLER2(b, a, b##a); \ _DEFINE_SWIZZLER2(b, a, b2##a2); \ _DEFINE_SWIZZLER2(b, a, b3##a3); \ - _DEFINE_SWIZZLER2(b, a, b4##a4); + _DEFINE_SWIZZLER2(b, a, b4##a4) DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); #undef DEFINE_SWIZZLER2 #undef _DEFINE_SWIZZLER2 - - // Inserters to add new elements to effectively create larger vectors containing this Vec2 - Vec4<T> InsertBeforeX(const T& value) { - return Vec4<T>(value, x, y, z); - } - Vec4<T> InsertBeforeY(const T& value) { - return Vec4<T>(x, value, y, z); - } - Vec4<T> InsertBeforeZ(const T& value) { - return Vec4<T>(x, y, value, z); - } - Vec4<T> Append(const T& value) { - return Vec4<T>(x, y, z, value); - } }; template<typename T, typename V> @@ -348,16 +328,27 @@ Vec3<T> operator * (const V& f, const Vec3<T>& vec) return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); } +template<> +inline float Vec3<float>::Length() const { + return std::sqrt(x * x + y * y + z * z); +} + +template<> +inline Vec3<float> Vec3<float>::Normalized() const { + return *this / Length(); +} + + typedef Vec3<float> Vec3f; template<typename T> class Vec4 { public: - struct - { - T x,y,z,w; - }; + T x; + T y; + T z; + T w; T* AsArray() { return &x; } @@ -383,34 +374,34 @@ public: a[0] = x; a[1] = y; a[2] = z; a[3] = w; } - Vec4 operator +(const Vec4& other) const + Vec4<decltype(T{}+T{})> operator +(const Vec4& other) const { - return Vec4(x+other.x, y+other.y, z+other.z, w+other.w); + return MakeVec(x+other.x, y+other.y, z+other.z, w+other.w); } void operator += (const Vec4& other) { x+=other.x; y+=other.y; z+=other.z; w+=other.w; } - Vec4 operator -(const Vec4 &other) const + Vec4<decltype(T{}-T{})> operator -(const Vec4 &other) const { - return Vec4(x-other.x, y-other.y, z-other.z, w-other.w); + return MakeVec(x-other.x, y-other.y, z-other.z, w-other.w); } void operator -= (const Vec4 &other) { x-=other.x; y-=other.y; z-=other.z; w-=other.w; } - Vec4 operator -() const + Vec4<decltype(-T{})> operator -() const { - return Vec4(-x,-y,-z,-w); + return MakeVec(-x,-y,-z,-w); } - Vec4 operator * (const Vec4 &other) const + Vec4<decltype(T{}*T{})> operator * (const Vec4 &other) const { - return Vec4(x*other.x, y*other.y, z*other.z, w*other.w); + return MakeVec(x*other.x, y*other.y, z*other.z, w*other.w); } template<typename V> - Vec4 operator * (const V& f) const + Vec4<decltype(T{}*V{})> operator * (const V& f) const { - return Vec4(x*f,y*f,z*f,w*f); + return MakeVec(x*f,y*f,z*f,w*f); } template<typename V> void operator *= (const V& f) @@ -418,9 +409,9 @@ public: x*=f; y*=f; z*=f; w*=f; } template<typename V> - Vec4 operator / (const V& f) const + Vec4<decltype(T{}/V{})> operator / (const V& f) const { - return Vec4(x/f,y/f,z/f,w/f); + return MakeVec(x/f,y/f,z/f,w/f); } template<typename V> void operator /= (const V& f) @@ -469,12 +460,12 @@ public: // swizzlers - create a subvector of specific components // e.g. Vec2 uv() { return Vec2(x,y); } // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); } +#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); } #define DEFINE_SWIZZLER2(a, b, a2, b2) \ _DEFINE_SWIZZLER2(a, b, a##b); \ _DEFINE_SWIZZLER2(a, b, a2##b2); \ _DEFINE_SWIZZLER2(b, a, b##a); \ - _DEFINE_SWIZZLER2(b, a, b2##a2); + _DEFINE_SWIZZLER2(b, a, b2##a2) DEFINE_SWIZZLER2(x, y, r, g); DEFINE_SWIZZLER2(x, z, r, b); @@ -485,7 +476,7 @@ public: #undef DEFINE_SWIZZLER2 #undef _DEFINE_SWIZZLER2 -#define _DEFINE_SWIZZLER3(a, b, c, name) Vec3<T> name() const { return Vec3<T>(a, b, c); } +#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); } #define DEFINE_SWIZZLER3(a, b, c, a2, b2, c2) \ _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ @@ -498,7 +489,7 @@ public: _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ - _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2); + _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2) DEFINE_SWIZZLER3(x, y, z, r, g, b); DEFINE_SWIZZLER3(x, y, w, r, g, a); @@ -510,69 +501,121 @@ public: template<typename T, typename V> -Vec4<T> operator * (const V& f, const Vec4<T>& vec) +Vec4<decltype(V{}*T{})> operator * (const V& f, const Vec4<T>& vec) { - return Vec4<T>(f*vec.x,f*vec.y,f*vec.z,f*vec.w); + return MakeVec(f*vec.x,f*vec.y,f*vec.z,f*vec.w); } typedef Vec4<float> Vec4f; template<typename T> -static inline T Dot(const Vec2<T>& a, const Vec2<T>& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec2<T>& a, const Vec2<T>& b) { return a.x*b.x + a.y*b.y; } template<typename T> -static inline T Dot(const Vec3<T>& a, const Vec3<T>& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec3<T>& a, const Vec3<T>& b) { return a.x*b.x + a.y*b.y + a.z*b.z; } template<typename T> -static inline T Dot(const Vec4<T>& a, const Vec4<T>& b) +static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec4<T>& a, const Vec4<T>& b) { return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; } template<typename T> -static inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b) +static inline Vec3<decltype(T{}*T{}-T{}*T{})> Cross(const Vec3<T>& a, const Vec3<T>& b) { - return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); + return MakeVec(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); } // linear interpolation via float: 0.0=begin, 1.0=end template<typename X> -static inline X Lerp(const X& begin, const X& end, const float t) +static inline decltype(X{}*float{}+X{}*float{}) Lerp(const X& begin, const X& end, const float t) { return begin*(1.f-t) + end*t; } // linear interpolation via int: 0=begin, base=end template<typename X, int base> -static inline X LerpInt(const X& begin, const X& end, const int t) +static inline decltype((X{}*int{}+X{}*int{}) / base) LerpInt(const X& begin, const X& end, const int t) { return (begin*(base-t) + end*t) / base; } // Utility vector factories template<typename T> -static inline Vec2<T> MakeVec2(const T& x, const T& y) +static inline Vec2<T> MakeVec(const T& x, const T& y) { return Vec2<T>{x, y}; } template<typename T> -static inline Vec3<T> MakeVec3(const T& x, const T& y, const T& z) +static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z) { return Vec3<T>{x, y, z}; } template<typename T> -static inline Vec4<T> MakeVec4(const T& x, const T& y, const T& z, const T& w) +static inline Vec4<T> MakeVec(const T& x, const T& y, const Vec2<T>& zw) +{ + return MakeVec(x, y, zw[0], zw[1]); +} + +template<typename T> +static inline Vec3<T> MakeVec(const Vec2<T>& xy, const T& z) +{ + return MakeVec(xy[0], xy[1], z); +} + +template<typename T> +static inline Vec3<T> MakeVec(const T& x, const Vec2<T>& yz) +{ + return MakeVec(x, yz[0], yz[1]); +} + +template<typename T> +static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w) { return Vec4<T>{x, y, z, w}; } +template<typename T> +static inline Vec4<T> MakeVec(const Vec2<T>& xy, const T& z, const T& w) +{ + return MakeVec(xy[0], xy[1], z, w); +} + +template<typename T> +static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yz, const T& w) +{ + return MakeVec(x, yz[0], yz[1], w); +} + +// NOTE: This has priority over "Vec2<Vec2<T>> MakeVec(const Vec2<T>& x, const Vec2<T>& y)". +// Even if someone wanted to use an odd object like Vec2<Vec2<T>>, the compiler would error +// out soon enough due to misuse of the returned structure. +template<typename T> +static inline Vec4<T> MakeVec(const Vec2<T>& xy, const Vec2<T>& zw) +{ + return MakeVec(xy[0], xy[1], zw[0], zw[1]); +} + +template<typename T> +static inline Vec4<T> MakeVec(const Vec3<T>& xyz, const T& w) +{ + return MakeVec(xyz[0], xyz[1], xyz[2], w); +} + +template<typename T> +static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yzw) +{ + return MakeVec(x, yzw[0], yzw[1], yzw[2]); +} + + } // namespace diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 640830144..cfdc9b934 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -4,6 +4,7 @@ #pragma once +#include <array> #include <cstddef> #include <initializer_list> #include <map> @@ -57,7 +58,7 @@ struct Regs { INSERT_PADDING_WORDS(0x1); - union { + union VSOutputAttributes { // Maps components of output vertex attributes to semantics enum Semantic : u32 { @@ -94,7 +95,137 @@ struct Regs { BitField<16, 16, u32> y; } viewport_corner; - INSERT_PADDING_WORDS(0xa7); + INSERT_PADDING_WORDS(0x17); + + struct TextureConfig { + INSERT_PADDING_WORDS(0x1); + + union { + BitField< 0, 16, u32> height; + BitField<16, 16, u32> width; + }; + + INSERT_PADDING_WORDS(0x2); + + u32 address; + + u32 GetPhysicalAddress() { + return DecodeAddressRegister(address) - Memory::FCRAM_PADDR + Memory::HEAP_GSP_VADDR; + } + + // texture1 and texture2 store the texture format directly after the address + // whereas texture0 inserts some additional flags inbetween. + // Hence, we store the format separately so that all other parameters can be described + // in a single structure. + }; + + enum class TextureFormat : u32 { + RGBA8 = 0, + RGB8 = 1, + RGBA5551 = 2, + RGB565 = 3, + RGBA4 = 4, + + // TODO: Support for the other formats is not implemented, yet. + // Seems like they are luminance formats and compressed textures. + }; + + BitField<0, 1, u32> texturing_enable; + TextureConfig texture0; + INSERT_PADDING_WORDS(0x8); + BitField<0, 4, TextureFormat> texture0_format; + + INSERT_PADDING_WORDS(0x31); + + // 0xc0-0xff: Texture Combiner (akin to glTexEnv) + struct TevStageConfig { + enum class Source : u32 { + PrimaryColor = 0x0, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // 0x7-0xc = primary color?? + Constant = 0xe, + Previous = 0xf, + }; + + enum class ColorModifier : u32 { + SourceColor = 0, + OneMinusSourceColor = 1, + SourceAlpha = 2, + OneMinusSourceAlpha = 3, + + // Other values seem to be non-standard extensions + }; + + enum class AlphaModifier : u32 { + SourceAlpha = 0, + OneMinusSourceAlpha = 1, + + // Other values seem to be non-standard extensions + }; + + enum class Operation : u32 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + }; + + union { + BitField< 0, 4, Source> color_source1; + BitField< 4, 4, Source> color_source2; + BitField< 8, 4, Source> color_source3; + BitField<16, 4, Source> alpha_source1; + BitField<20, 4, Source> alpha_source2; + BitField<24, 4, Source> alpha_source3; + }; + + union { + BitField< 0, 4, ColorModifier> color_modifier1; + BitField< 4, 4, ColorModifier> color_modifier2; + BitField< 8, 4, ColorModifier> color_modifier3; + BitField<12, 3, AlphaModifier> alpha_modifier1; + BitField<16, 3, AlphaModifier> alpha_modifier2; + BitField<20, 3, AlphaModifier> alpha_modifier3; + }; + + union { + BitField< 0, 4, Operation> color_op; + BitField<16, 4, Operation> alpha_op; + }; + + union { + BitField< 0, 8, u32> const_r; + BitField< 8, 8, u32> const_g; + BitField<16, 8, u32> const_b; + BitField<24, 8, u32> const_a; + }; + + INSERT_PADDING_WORDS(0x1); + }; + + TevStageConfig tev_stage0; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage1; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage2; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage3; + INSERT_PADDING_WORDS(0x13); + TevStageConfig tev_stage4; + INSERT_PADDING_WORDS(0x3); + TevStageConfig tev_stage5; + INSERT_PADDING_WORDS(0x13); + + const std::array<Regs::TevStageConfig,6> GetTevStages() const { + return { tev_stage0, tev_stage1, + tev_stage2, tev_stage3, + tev_stage4, tev_stage5 }; + }; struct { enum ColorFormat : u32 { @@ -403,6 +534,15 @@ struct Regs { ADD_FIELD(viewport_depth_range); ADD_FIELD(viewport_depth_far_plane); ADD_FIELD(viewport_corner); + ADD_FIELD(texturing_enable); + ADD_FIELD(texture0); + ADD_FIELD(texture0_format); + ADD_FIELD(tev_stage0); + ADD_FIELD(tev_stage1); + ADD_FIELD(tev_stage2); + ADD_FIELD(tev_stage3); + ADD_FIELD(tev_stage4); + ADD_FIELD(tev_stage5); ADD_FIELD(framebuffer); ADD_FIELD(vertex_attributes); ADD_FIELD(index_array); @@ -460,6 +600,15 @@ ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); +ASSERT_REG_POSITION(texturing_enable, 0x80); +ASSERT_REG_POSITION(texture0, 0x81); +ASSERT_REG_POSITION(texture0_format, 0x8e); +ASSERT_REG_POSITION(tev_stage0, 0xc0); +ASSERT_REG_POSITION(tev_stage1, 0xc8); +ASSERT_REG_POSITION(tev_stage2, 0xd0); +ASSERT_REG_POSITION(tev_stage3, 0xd8); +ASSERT_REG_POSITION(tev_stage4, 0xf0); +ASSERT_REG_POSITION(tev_stage5, 0xf8); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 2354ffb99..dabf2d1a3 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -2,21 +2,23 @@ // Licensed under GPLv2 // Refer to the license.txt file included. -#include "clipper.h" #include "pica.h" #include "primitive_assembly.h" #include "vertex_shader.h" -namespace Pica { +#include "video_core/debug_utils/debug_utils.h" -namespace PrimitiveAssembly { +namespace Pica { -static OutputVertex buffer[2]; -static int buffer_index = 0; // TODO: reset this on emulation restart +template<typename VertexType> +PrimitiveAssembler<VertexType>::PrimitiveAssembler(Regs::TriangleTopology topology) + : topology(topology), buffer_index(0) { +} -void SubmitVertex(OutputVertex& vtx) +template<typename VertexType> +void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) { - switch (registers.triangle_topology) { + switch (topology) { case Regs::TriangleTopology::List: case Regs::TriangleTopology::ListIndexed: if (buffer_index < 2) { @@ -24,7 +26,7 @@ void SubmitVertex(OutputVertex& vtx) } else { buffer_index = 0; - Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); + triangle_handler(buffer[0], buffer[1], vtx); } break; @@ -32,7 +34,7 @@ void SubmitVertex(OutputVertex& vtx) if (buffer_index == 2) { buffer_index = 0; - Clipper::ProcessTriangle(buffer[0], buffer[1], vtx); + triangle_handler(buffer[0], buffer[1], vtx); buffer[1] = vtx; } else { @@ -41,11 +43,15 @@ void SubmitVertex(OutputVertex& vtx) break; default: - ERROR_LOG(GPU, "Unknown triangle mode %x:", (int)registers.triangle_topology.Value()); + ERROR_LOG(GPU, "Unknown triangle topology %x:", (int)topology); break; } } -} // namespace +// explicitly instantiate use cases +template +struct PrimitiveAssembler<VertexShader::OutputVertex>; +template +struct PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex>; } // namespace diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h index 2a2b0c170..ea2e2f61e 100644 --- a/src/video_core/primitive_assembly.h +++ b/src/video_core/primitive_assembly.h @@ -4,18 +4,40 @@ #pragma once -namespace Pica { +#include <functional> -namespace VertexShader { - struct OutputVertex; -} +#include "video_core/pica.h" -namespace PrimitiveAssembly { +#include "video_core/vertex_shader.h" -using VertexShader::OutputVertex; +namespace Pica { -void SubmitVertex(OutputVertex& vtx); +/* + * Utility class to build triangles from a series of vertices, + * according to a given triangle topology. + */ +template<typename VertexType> +struct PrimitiveAssembler { + using TriangleHandler = std::function<void(VertexType& v0, + VertexType& v1, + VertexType& v2)>; + + PrimitiveAssembler(Regs::TriangleTopology topology); + + /* + * Queues a vertex, builds primitives from the vertex queue according to the given + * triangle topology, and calls triangle_handler for each generated primitive. + * NOTE: We could specify the triangle handler in the constructor, but this way we can + * keep event and handler code next to each other. + */ + void SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler); + +private: + Regs::TriangleTopology topology; + + int buffer_index; + VertexType buffer[2]; +}; -} // namespace } // namespace diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a7c1bab3e..cdfdb6215 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -11,6 +11,8 @@ #include "rasterizer.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" + namespace Pica { namespace Rasterizer { @@ -78,10 +80,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); - min_x = min_x & Fix12P4::IntMask(); - min_y = min_y & Fix12P4::IntMask(); - max_x = (max_x + Fix12P4::FracMask()) & Fix12P4::IntMask(); - max_y = (max_y + Fix12P4::FracMask()) & Fix12P4::IntMask(); + min_x &= Fix12P4::IntMask(); + min_y &= Fix12P4::IntMask(); + max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); + max_y = ((max_y + Fix12P4::FracMask()) & Fix12P4::IntMask()); // Triangle filling rules: Pixels on the right-sided edge or on flat bottom edges are not // drawn. Pixels on any other triangle border are drawn. This is implemented with three bias @@ -112,10 +114,10 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, const Math::Vec2<Fix12P4>& vtx2, const Math::Vec2<Fix12P4>& vtx3) { - const auto vec1 = (vtx2.Cast<int>() - vtx1.Cast<int>()).Append(0); - const auto vec2 = (vtx3.Cast<int>() - vtx1.Cast<int>()).Append(0); + const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); + const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); // TODO: There is a very small chance this will overflow for sizeof(int) == 4 - return Cross(vec1, vec2).z; + return Math::Cross(vec1, vec2).z; }; int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); @@ -143,15 +145,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, // // The generalization to three vertices is straightforward in baricentric coordinates. auto GetInterpolatedAttribute = [&](float24 attr0, float24 attr1, float24 attr2) { - auto attr_over_w = Math::MakeVec3(attr0 / v0.pos.w, - attr1 / v1.pos.w, - attr2 / v2.pos.w); - auto w_inverse = Math::MakeVec3(float24::FromFloat32(1.f) / v0.pos.w, - float24::FromFloat32(1.f) / v1.pos.w, - float24::FromFloat32(1.f) / v2.pos.w); - auto baricentric_coordinates = Math::MakeVec3(float24::FromFloat32(w0), - float24::FromFloat32(w1), - float24::FromFloat32(w2)); + auto attr_over_w = Math::MakeVec(attr0 / v0.pos.w, + attr1 / v1.pos.w, + attr2 / v2.pos.w); + auto w_inverse = Math::MakeVec(float24::FromFloat32(1.f) / v0.pos.w, + float24::FromFloat32(1.f) / v1.pos.w, + float24::FromFloat32(1.f) / v2.pos.w); + auto baricentric_coordinates = Math::MakeVec(float24::FromFloat32(w0), + float24::FromFloat32(w1), + float24::FromFloat32(w2)); float24 interpolated_attr_over_w = Math::Dot(attr_over_w, baricentric_coordinates); float24 interpolated_w_inverse = Math::Dot(w_inverse, baricentric_coordinates); @@ -165,12 +167,196 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, (u8)(GetInterpolatedAttribute(v0.color.a(), v1.color.a(), v2.color.a()).ToFloat32() * 255) }; + Math::Vec4<u8> texture_color{}; + float24 u = GetInterpolatedAttribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); + float24 v = GetInterpolatedAttribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); + if (registers.texturing_enable) { + // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each + // of which is composed of four 2x2 subtiles each of which is composed of four texels. + // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. + // texels are laid out in a 2x2 subtile like this: + // 2 3 + // 0 1 + // + // The full 8x8 tile has the texels arranged like this: + // + // 42 43 46 47 58 59 62 63 + // 40 41 44 45 56 57 60 61 + // 34 35 38 39 50 51 54 55 + // 32 33 36 37 48 49 52 53 + // 10 11 14 15 26 27 30 31 + // 08 09 12 13 24 25 28 29 + // 02 03 06 07 18 19 22 23 + // 00 01 04 05 16 17 20 21 + + // TODO: This is currently hardcoded for RGB8 + u32* texture_data = (u32*)Memory::GetPointer(registers.texture0.GetPhysicalAddress()); + + // TODO(neobrain): Not sure if this swizzling pattern is used for all textures. + // To be flexible in case different but similar patterns are used, we keep this + // somewhat inefficient code around for now. + int s = (int)(u * float24::FromFloat32(registers.texture0.width)).ToFloat32(); + int t = (int)(v * float24::FromFloat32(registers.texture0.height)).ToFloat32(); + int texel_index_within_tile = 0; + for (int block_size_index = 0; block_size_index < 3; ++block_size_index) { + int sub_tile_width = 1 << block_size_index; + int sub_tile_height = 1 << block_size_index; + + int sub_tile_index = (s & sub_tile_width) << block_size_index; + sub_tile_index += 2 * ((t & sub_tile_height) << block_size_index); + texel_index_within_tile += sub_tile_index; + } + + const int block_width = 8; + const int block_height = 8; + + int coarse_s = (s / block_width) * block_width; + int coarse_t = (t / block_height) * block_height; + + const int row_stride = registers.texture0.width * 3; + u8* source_ptr = (u8*)texture_data + coarse_s * block_height * 3 + coarse_t * row_stride + texel_index_within_tile * 3; + texture_color.r() = source_ptr[2]; + texture_color.g() = source_ptr[1]; + texture_color.b() = source_ptr[0]; + texture_color.a() = 0xFF; + + DebugUtils::DumpTexture(registers.texture0, (u8*)texture_data); + } + + // Texture environment - consists of 6 stages of color and alpha combining. + // + // Color combiners take three input color values from some source (e.g. interpolated + // vertex color, texture color, previous stage, etc), perform some very simple + // operations on each of them (e.g. inversion) and then calculate the output color + // with some basic arithmetic. Alpha combiners can be configured separately but work + // analogously. + Math::Vec4<u8> combiner_output; + for (auto tev_stage : registers.GetTevStages()) { + using Source = Regs::TevStageConfig::Source; + using ColorModifier = Regs::TevStageConfig::ColorModifier; + using AlphaModifier = Regs::TevStageConfig::AlphaModifier; + using Operation = Regs::TevStageConfig::Operation; + + auto GetColorSource = [&](Source source) -> Math::Vec3<u8> { + switch (source) { + case Source::PrimaryColor: + return primary_color.rgb(); + + case Source::Texture0: + return texture_color.rgb(); + + case Source::Constant: + return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b}; + + case Source::Previous: + return combiner_output.rgb(); + + default: + ERROR_LOG(GPU, "Unknown color combiner source %d\n", (int)source); + return {}; + } + }; + + auto GetAlphaSource = [&](Source source) -> u8 { + switch (source) { + case Source::PrimaryColor: + return primary_color.a(); + + case Source::Texture0: + return texture_color.a(); + + case Source::Constant: + return tev_stage.const_a; + + case Source::Previous: + return combiner_output.a(); + + default: + ERROR_LOG(GPU, "Unknown alpha combiner source %d\n", (int)source); + return 0; + } + }; + + auto GetColorModifier = [](ColorModifier factor, const Math::Vec3<u8>& values) -> Math::Vec3<u8> { + switch (factor) + { + case ColorModifier::SourceColor: + return values; + default: + ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + return {}; + } + }; + + auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { + switch (factor) { + case AlphaModifier::SourceAlpha: + return value; + default: + ERROR_LOG(GPU, "Unknown color factor %d\n", (int)factor); + return 0; + } + }; + + auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return ((input[0] * input[1]) / 255).Cast<u8>(); + + default: + ERROR_LOG(GPU, "Unknown color combiner operation %d\n", (int)op); + return {}; + } + }; + + auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { + switch (op) { + case Operation::Replace: + return input[0]; + + case Operation::Modulate: + return input[0] * input[1] / 255; + + default: + ERROR_LOG(GPU, "Unknown alpha combiner operation %d\n", (int)op); + return 0; + } + }; + + // color combiner + // NOTE: Not sure if the alpha combiner might use the color output of the previous + // stage as input. Hence, we currently don't directly write the result to + // combiner_output.rgb(), but instead store it in a temporary variable until + // alpha combining has been done. + Math::Vec3<u8> color_result[3] = { + GetColorModifier(tev_stage.color_modifier1, GetColorSource(tev_stage.color_source1)), + GetColorModifier(tev_stage.color_modifier2, GetColorSource(tev_stage.color_source2)), + GetColorModifier(tev_stage.color_modifier3, GetColorSource(tev_stage.color_source3)) + }; + auto color_output = ColorCombine(tev_stage.color_op, color_result); + + // alpha combiner + std::array<u8,3> alpha_result = { + GetAlphaModifier(tev_stage.alpha_modifier1, GetAlphaSource(tev_stage.alpha_source1)), + GetAlphaModifier(tev_stage.alpha_modifier2, GetAlphaSource(tev_stage.alpha_source2)), + GetAlphaModifier(tev_stage.alpha_modifier3, GetAlphaSource(tev_stage.alpha_source3)) + }; + auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); + + combiner_output = Math::MakeVec(color_output, alpha_output); + } + + // TODO: Not sure if the multiplication by 65535 has already been taken care + // of when transforming to screen coordinates or not. u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + (float)v1.screenpos[2].ToFloat32() * w1 + - (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); // TODO: Shouldn't need to multiply by 65536? + (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); SetDepth(x >> 4, y >> 4, z); - DrawPixel(x >> 4, y >> 4, primary_color); + DrawPixel(x >> 4, y >> 4, combiner_output); } } } diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 93830a96a..db8244317 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -4,6 +4,7 @@ #include "pica.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" #include <core/mem_map.h> #include <common/file_util.h> @@ -50,6 +51,11 @@ struct VertexShaderState { }; u32 call_stack[8]; // TODO: What is the maximal call stack depth? u32* call_stack_pointer; + + struct { + u32 max_offset; // maximum program counter ever reached + u32 max_opdesc_id; // maximum swizzle pattern index ever used + } debug; }; static void ProcessShaderCode(VertexShaderState& state) { @@ -57,27 +63,34 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; + state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); - const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1] - : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x - : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x - : nullptr; - const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2] - : &state.temporary_registers[instr.common.src2-0x10].x; - // TODO: Unsure about the limit values - float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest] - : (instr.common.dest <= 0x3C) ? nullptr - : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4] + const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] + : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x + : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x : nullptr; + const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] + : &state.temporary_registers[instr.common.src2.GetIndex()].x; + float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] + : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + : nullptr; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; + const bool negate_src1 = swizzle.negate; - const float24 src1[4] = { + float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], src1_[(int)swizzle.GetSelectorSrc1(1)], src1_[(int)swizzle.GetSelectorSrc1(2)], src1_[(int)swizzle.GetSelectorSrc1(3)], }; + if (negate_src1) { + src1[0] = src1[0] * float24::FromFloat32(-1); + src1[1] = src1[1] * float24::FromFloat32(-1); + src1[2] = src1[2] * float24::FromFloat32(-1); + src1[3] = src1[3] * float24::FromFloat32(-1); + } const float24 src2[4] = { src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], @@ -88,6 +101,7 @@ static void ProcessShaderCode(VertexShaderState& state) { switch (instr.opcode) { case Instruction::OpCode::ADD: { + state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -100,6 +114,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MUL: { + state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -113,6 +128,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { + state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); float24 dot = float24::FromFloat32(0.f); int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) @@ -130,6 +146,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal case Instruction::OpCode::RCP: { + state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -145,6 +162,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal Square Root case Instruction::OpCode::RSQ: { + state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -159,6 +177,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MOV: { + state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -172,8 +191,9 @@ static void ProcessShaderCode(VertexShaderState& state) { if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { exit_loop = true; } else { - state.program_counter = &shader_memory[*state.call_stack_pointer--]; - *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS; + // Jump back to call stack position, invalidate call stack entry, move up call stack pointer + state.program_counter = &shader_memory[*state.call_stack_pointer]; + *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; } break; @@ -212,6 +232,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) const u32* main = &shader_memory[registers.vs_main_offset]; state.program_counter = (u32*)main; + state.debug.max_offset = 0; + state.debug.max_opdesc_id = 0; // Setup input register table const auto& attribute_register_map = registers.vs_input_register_map; @@ -255,6 +277,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); + DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, + state.debug.max_opdesc_id, registers.vs_main_offset, + registers.vs_output_attributes); DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index 1b71e367b..847fdc450 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -27,7 +27,6 @@ struct OutputVertex { Math::Vec4<float24> dummy; // quaternions (not implemented, yet) Math::Vec4<float24> color; Math::Vec2<float24> tc0; - float24 tc0_v; // Padding for optimal alignment float24 pad[14]; @@ -36,6 +35,7 @@ struct OutputVertex { // position after perspective divide Math::Vec3<float24> screenpos; + float24 pad2; // Linear interpolation // factor: 0=this, 1=vtx @@ -59,6 +59,7 @@ struct OutputVertex { } }; static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); +static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); union Instruction { enum class OpCode : u32 { @@ -117,9 +118,78 @@ union Instruction { // while "dest" addresses individual floats. union { BitField<0x00, 0x5, u32> operand_desc_id; - BitField<0x07, 0x5, u32> src2; - BitField<0x0c, 0x7, u32> src1; - BitField<0x13, 0x7, u32> dest; + + template<class BitFieldType> + struct SourceRegister : BitFieldType { + enum RegisterType { + Input, + Temporary, + FloatUniform + }; + + RegisterType GetRegisterType() const { + if (BitFieldType::Value() < 0x10) + return Input; + else if (BitFieldType::Value() < 0x20) + return Temporary; + else + return FloatUniform; + } + + int GetIndex() const { + if (GetRegisterType() == Input) + return BitFieldType::Value(); + else if (GetRegisterType() == Temporary) + return BitFieldType::Value() - 0x10; + else if (GetRegisterType() == FloatUniform) + return BitFieldType::Value() - 0x20; + } + + std::string GetRegisterName() const { + std::map<RegisterType, std::string> type = { + { Input, "i" }, + { Temporary, "t" }, + { FloatUniform, "f" }, + }; + return type[GetRegisterType()] + std::to_string(GetIndex()); + } + }; + + SourceRegister<BitField<0x07, 0x5, u32>> src2; + SourceRegister<BitField<0x0c, 0x7, u32>> src1; + + struct : BitField<0x15, 0x5, u32> + { + enum RegisterType { + Output, + Temporary, + Unknown + }; + RegisterType GetRegisterType() const { + if (Value() < 0x8) + return Output; + else if (Value() < 0x10) + return Unknown; + else + return Temporary; + } + int GetIndex() const { + if (GetRegisterType() == Output) + return Value(); + else if (GetRegisterType() == Temporary) + return Value() - 0x10; + else + return Value(); + } + std::string GetRegisterName() const { + std::map<RegisterType, std::string> type = { + { Output, "o" }, + { Temporary, "t" }, + { Unknown, "u" } + }; + return type[GetRegisterType()] + std::to_string(GetIndex()); + } + } dest; } common; // Format used for flow control instructions ("if") @@ -128,6 +198,7 @@ union Instruction { BitField<0x0a, 0xc, u32> offset_words; } flow_control; }; +static_assert(std::is_standard_layout<Instruction>::value, "Structure is not using standard layout!"); union SwizzlePattern { u32 hex; @@ -185,6 +256,8 @@ union SwizzlePattern { // Components of "dest" that should be written to: LSB=dest.w, MSB=dest.x BitField< 0, 4, u32> dest_mask; + BitField< 4, 1, u32> negate; // negates src1 + BitField< 5, 2, Selector> src1_selector_3; BitField< 7, 2, Selector> src1_selector_2; BitField< 9, 2, Selector> src1_selector_1; diff --git a/src/video_core/video_core.vcxproj b/src/video_core/video_core.vcxproj index 48d77cdc4..4e129fbe7 100644 --- a/src/video_core/video_core.vcxproj +++ b/src/video_core/video_core.vcxproj @@ -19,6 +19,7 @@ </ProjectConfiguration> </ItemGroup> <ItemGroup> + <ClCompile Include="debug_utils\debug_utils.cpp" /> <ClCompile Include="renderer_opengl\renderer_opengl.cpp" /> <ClCompile Include="clipper.cpp" /> <ClCompile Include="command_processor.cpp" /> @@ -40,6 +41,7 @@ <ClInclude Include="utils.h" /> <ClInclude Include="vertex_shader.h" /> <ClInclude Include="video_core.h" /> + <ClInclude Include="debug_utils\debug_utils.h" /> <ClInclude Include="renderer_opengl\renderer_opengl.h" /> </ItemGroup> <ItemGroup> diff --git a/src/video_core/video_core.vcxproj.filters b/src/video_core/video_core.vcxproj.filters index 31af4f1df..90541aca0 100644 --- a/src/video_core/video_core.vcxproj.filters +++ b/src/video_core/video_core.vcxproj.filters @@ -4,6 +4,9 @@ <Filter Include="renderer_opengl"> <UniqueIdentifier>{e0245557-dbd4-423e-9399-513d5e99f1e4}</UniqueIdentifier> </Filter> + <Filter Include="debug_utils"> + <UniqueIdentifier>{0ac498e6-bbd8-46e3-9d5f-e816546ab90e}</UniqueIdentifier> + </Filter> </ItemGroup> <ItemGroup> <ClCompile Include="renderer_opengl\renderer_opengl.cpp"> @@ -16,11 +19,11 @@ <ClCompile Include="utils.cpp" /> <ClCompile Include="vertex_shader.cpp" /> <ClCompile Include="video_core.cpp" /> + <ClCompile Include="debug_utils\debug_utils.cpp"> + <Filter>debug_utils</Filter> + </ClCompile> </ItemGroup> <ItemGroup> - <ClInclude Include="renderer_opengl\renderer_opengl.h"> - <Filter>renderer_opengl</Filter> - </ClInclude> <ClInclude Include="clipper.h" /> <ClInclude Include="command_processor.h" /> <ClInclude Include="gpu_debugger.h" /> @@ -32,8 +35,12 @@ <ClInclude Include="utils.h" /> <ClInclude Include="vertex_shader.h" /> <ClInclude Include="video_core.h" /> + <ClInclude Include="renderer_opengl\renderer_opengl.h" /> + <ClInclude Include="debug_utils\debug_utils.h"> + <Filter>debug_utils</Filter> + </ClInclude> </ItemGroup> <ItemGroup> <Text Include="CMakeLists.txt" /> </ItemGroup> -</Project> +</Project>
\ No newline at end of file |