diff options
-rw-r--r-- | src/citra_qt/debugger/graphics_cmdlists.cpp | 2 | ||||
-rw-r--r-- | src/common/common.vcxproj | 1 | ||||
-rw-r--r-- | src/common/common.vcxproj.filters | 1 | ||||
-rw-r--r-- | src/common/register_set.h | 163 | ||||
-rw-r--r-- | src/core/hle/service/gsp.cpp | 36 | ||||
-rw-r--r-- | src/core/hw/gpu.cpp | 47 | ||||
-rw-r--r-- | src/core/hw/gpu.h | 297 | ||||
-rw-r--r-- | src/video_core/pica.h | 242 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 4 |
9 files changed, 377 insertions, 416 deletions
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 195197ef5..30b8b5dae 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -83,7 +83,7 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const if (role == Qt::DisplayRole) { QString content; if (index.column() == 0) { - content = Pica::command_names[header.cmd_id]; + content = QString::fromLatin1(Pica::Regs::GetCommandName(header.cmd_id).c_str()); content.append(" "); } else if (index.column() == 1) { for (int j = 0; j < cmd.size(); ++j) diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj index 1f5c714c3..341d3a813 100644 --- a/src/common/common.vcxproj +++ b/src/common/common.vcxproj @@ -182,7 +182,6 @@ <ClInclude Include="mem_arena.h" /> <ClInclude Include="msg_handler.h" /> <ClInclude Include="platform.h" /> - <ClInclude Include="register_set.h" /> <ClInclude Include="scm_rev.h" /> <ClInclude Include="std_condition_variable.h" /> <ClInclude Include="std_mutex.h" /> diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters index e8c4ce360..59268ce5a 100644 --- a/src/common/common.vcxproj.filters +++ b/src/common/common.vcxproj.filters @@ -29,7 +29,6 @@ <ClInclude Include="memory_util.h" /> <ClInclude Include="msg_handler.h" /> <ClInclude Include="platform.h" /> - <ClInclude Include="register_set.h" /> <ClInclude Include="std_condition_variable.h" /> <ClInclude Include="std_mutex.h" /> <ClInclude Include="std_thread.h" /> diff --git a/src/common/register_set.h b/src/common/register_set.h deleted file mode 100644 index ba19a2614..000000000 --- a/src/common/register_set.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -// Copyright 2014 Tony Wasserka -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of the owner nor the names of its contributors may -// be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -/* - * Standardized way to define a group of registers and corresponding data structures. To define - * a new register set, first define struct containing an enumeration called "Id" containing - * all register IDs and a template struct called "Struct". Specialize the Struct struct for any - * register ID which needs to be accessed in a specialized way. You can then declare the object - * containing all register values using the RegisterSet<BaseType, DefiningStruct> type, where - * BaseType is the underlying type of each register (e.g. u32). - * Of course, you'll usually want to implement the Struct template such that they are of the same - * size as BaseType. However, it's also possible to make it larger, e.g. when you want to describe - * multiple registers with the same structure. - * - * Example: - * - * struct Regs { - * enum Id : u32 { - * Value1 = 0, - * Value2 = 1, - * Value3 = 2, - * NumIds = 3 - * }; - * - * // declare register definition structures - * template<Id id> - * struct Struct; - * }; - * - * // Define register set object - * RegisterSet<u32, CommandIds> registers; - * - * // define register definition structures - * template<> - * struct Regs::Struct<Regs::Value1> { - * union { - * BitField<0, 4, u32> some_field; - * BitField<4, 3, u32> some_other_field; - * }; - * }; - * - * Usage in external code (within SomeNamespace scope): - * - * For a register which maps to a single index: - * registers.Get<Regs::Value1>().some_field = some_value; - * - * For a register which maps to different indices, e.g. a group of similar registers - * registers.Get<Regs::Value1>(index).some_field = some_value; - * - * - * @tparam BaseType Base type used for storing individual registers, e.g. u32 - * @tparam RegDefinition Class defining an enumeration called "Id" and a template<Id id> struct, as described above. - * @note RegDefinition::Id needs to have an enum value called NumIds defining the number of registers to be allocated. - */ -template<typename BaseType, typename RegDefinition> -struct RegisterSet { - // Register IDs - using Id = typename RegDefinition::Id; - - // type used for *this - using ThisType = RegisterSet<BaseType, RegDefinition>; - - // Register definition structs, defined in RegDefinition - template<Id id> - using Struct = typename RegDefinition::template Struct<id>; - - - /* - * Lookup register with the given id and return it as the corresponding structure type. - * @note This just forwards the arguments to Get(Id). - */ - template<Id id> - const Struct<id>& Get() const { - return Get<id>(id); - } - - /* - * Lookup register with the given id and return it as the corresponding structure type. - * @note This just forwards the arguments to Get(Id). - */ - template<Id id> - Struct<id>& Get() { - return Get<id>(id); - } - - /* - * Lookup register with the given index and return it as the corresponding structure type. - * @todo Is this portable with regards to structures larger than BaseType? - * @note if index==id, you don't need to specify the function parameter. - */ - template<Id id> - const Struct<id>& Get(const Id& index) const { - const int idx = static_cast<size_t>(index); - return *reinterpret_cast<const Struct<id>*>(&raw[idx]); - } - - /* - * Lookup register with the given index and return it as the corresponding structure type. - * @note This just forwards the arguments to the const version of Get(Id). - * @note if index==id, you don't need to specify the function parameter. - */ - template<Id id> - Struct<id>& Get(const Id& index) { - return const_cast<Struct<id>&>(GetThis().Get<id>(index)); - } - - /* - * Plain array access. - * @note If you want to have this casted to a register defininition struct, use Get() instead. - */ - const BaseType& operator[] (const Id& id) const { - return raw[static_cast<size_t>(id)]; - } - - /* - * Plain array access. - * @note If you want to have this casted to a register defininition struct, use Get() instead. - * @note This operator just forwards its argument to the const version. - */ - BaseType& operator[] (const Id& id) { - return const_cast<BaseType&>(GetThis()[id]); - } - -private: - /* - * Returns a const reference to "this". - */ - const ThisType& GetThis() const { - return static_cast<const ThisType&>(*this); - } - - BaseType raw[Id::NumIds]; -}; diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp index e241b31c8..08e65612e 100644 --- a/src/core/hle/service/gsp.cpp +++ b/src/core/hle/service/gsp.cpp @@ -173,11 +173,11 @@ void ExecuteCommand(const Command& command) { case CommandId::SET_COMMAND_LIST_LAST: { auto& params = command.set_command_list_last; - WriteGPURegister(GPU::Regs::CommandProcessor + 2, params.address >> 3); - WriteGPURegister(GPU::Regs::CommandProcessor, params.size >> 3); + WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), params.address >> 3); + WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3); // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though - WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); + WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1); // TODO: Move this to GPU // TODO: Not sure what units the size is measured in @@ -193,15 +193,15 @@ void ExecuteCommand(const Command& command) { case CommandId::SET_MEMORY_FILL: { auto& params = command.memory_fill; - WriteGPURegister(GPU::Regs::MemoryFill, params.start1 >> 3); - WriteGPURegister(GPU::Regs::MemoryFill + 1, params.end1 >> 3); - WriteGPURegister(GPU::Regs::MemoryFill + 2, params.end1 - params.start1); - WriteGPURegister(GPU::Regs::MemoryFill + 3, params.value1); - - WriteGPURegister(GPU::Regs::MemoryFill + 4, params.start2 >> 3); - WriteGPURegister(GPU::Regs::MemoryFill + 5, params.end2 >> 3); - WriteGPURegister(GPU::Regs::MemoryFill + 6, params.end2 - params.start2); - WriteGPURegister(GPU::Regs::MemoryFill + 7, params.value2); + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), params.start1 >> 3); + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), params.end1 >> 3); + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1); + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1); + + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), params.start2 >> 3); + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), params.end2 >> 3); + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2); + WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2); break; } @@ -220,15 +220,15 @@ void ExecuteCommand(const Command& command) { case CommandId::SET_TEXTURE_COPY: { auto& params = command.image_copy; - WriteGPURegister(GPU::Regs::DisplayTransfer, params.in_buffer_address >> 3); - WriteGPURegister(GPU::Regs::DisplayTransfer + 1, params.out_buffer_address >> 3); - WriteGPURegister(GPU::Regs::DisplayTransfer + 3, params.in_buffer_size); - WriteGPURegister(GPU::Regs::DisplayTransfer + 2, params.out_buffer_size); - WriteGPURegister(GPU::Regs::DisplayTransfer + 4, params.flags); + WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), params.in_buffer_address >> 3); + WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), params.out_buffer_address >> 3); + WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size); + WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size); + WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags); // TODO: Should this only be ORed with 1 for texture copies? // trigger transfer - WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1); + WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1); break; } diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index d94c2329b..fd40f8ac0 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -19,7 +19,7 @@ namespace GPU { -RegisterSet<u32, Regs> g_regs; +Regs g_regs; u32 g_cur_line = 0; ///< Current vertical screen line u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line @@ -32,8 +32,8 @@ void SetFramebufferLocation(const FramebufferLocation mode) { switch (mode) { case FRAMEBUFFER_LOCATION_FCRAM: { - auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); - auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); + auto& framebuffer_top = g_regs.framebuffer_config[0]; + auto& framebuffer_sub = g_regs.framebuffer_config[1]; framebuffer_top.address_left1 = PADDR_TOP_LEFT_FRAME1; framebuffer_top.address_left2 = PADDR_TOP_LEFT_FRAME2; @@ -48,8 +48,8 @@ void SetFramebufferLocation(const FramebufferLocation mode) { case FRAMEBUFFER_LOCATION_VRAM: { - auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); - auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); + auto& framebuffer_top = g_regs.framebuffer_config[0]; + auto& framebuffer_sub = g_regs.framebuffer_config[1]; framebuffer_top.address_left1 = PADDR_VRAM_TOP_LEFT_FRAME1; framebuffer_top.address_left2 = PADDR_VRAM_TOP_LEFT_FRAME2; @@ -107,13 +107,12 @@ inline void Read(T &var, const u32 raw_addr) { int index = addr / 4; // Reads other than u32 are untested, so I'd rather have them abort than silently fail - if (index >= Regs::NumIds || !std::is_same<T,u32>::value) - { + if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) { ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); return; } - var = g_regs[static_cast<Regs::Id>(addr / 4)]; + var = g_regs[addr / 4]; } template <typename T> @@ -122,22 +121,22 @@ inline void Write(u32 addr, const T data) { int index = addr / 4; // Writes other than u32 are untested, so I'd rather have them abort than silently fail - if (index >= Regs::NumIds || !std::is_same<T,u32>::value) - { + if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) { ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); return; } - g_regs[static_cast<Regs::Id>(index)] = data; + g_regs[index] = data; - switch (static_cast<Regs::Id>(index)) { + switch (index) { // Memory fills are triggered once the fill value is written. // NOTE: This is not verified. - case Regs::MemoryFill + 3: - case Regs::MemoryFill + 7: + case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3): + case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3): { - const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3)); + const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value)); + const auto& config = g_regs.memory_fill_config[is_second_filler]; // TODO: Not sure if this check should be done at GSP level instead if (config.address_start) { @@ -152,9 +151,9 @@ inline void Write(u32 addr, const T data) { break; } - case Regs::DisplayTransfer + 6: + case GPU_REG_INDEX(display_transfer_config.trigger): { - const auto& config = g_regs.Get<Regs::DisplayTransfer>(); + const auto& config = g_regs.display_transfer_config; if (config.trigger & 1) { u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress()); u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress()); @@ -221,13 +220,13 @@ inline void Write(u32 addr, const T data) { break; } - case Regs::CommandProcessor + 4: + case GPU_REG_INDEX(command_processor_config.trigger): { - const auto& config = g_regs.Get<Regs::CommandProcessor>(); + const auto& config = g_regs.command_processor_config; if (config.trigger & 1) { - // u32* buffer = (u32*)Memory::GetPointer(config.address << 3); - ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3); + // u32* buffer = (u32*)Memory::GetPointer(config.GetPhysicalAddress()); + ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.GetPhysicalAddress()); // TODO: Process command list! } break; @@ -252,7 +251,7 @@ template void Write<u8>(u32 addr, const u8 data); /// Update hardware void Update() { - auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); + auto& framebuffer_top = g_regs.framebuffer_config[0]; u64 current_ticks = Core::g_app_core->GetTicks(); // Synchronize line... @@ -280,8 +279,8 @@ void Init() { // SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); - auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); - auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); + auto& framebuffer_top = g_regs.framebuffer_config[0]; + auto& framebuffer_sub = g_regs.framebuffer_config[1]; // TODO: Width should be 240 instead? framebuffer_top.width = 480; framebuffer_top.height = 400; diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 42f18a0e7..3065da891 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -4,32 +4,57 @@ #pragma once +#include <cstddef> + #include "common/common_types.h" #include "common/bit_field.h" -#include "common/register_set.h" namespace GPU { static const u32 kFrameCycles = 268123480 / 60; ///< 268MHz / 60 frames per second static const u32 kFrameTicks = kFrameCycles / 3; ///< Approximate number of instructions/frame +// Returns index corresponding to the Regs member labeled by field_name +// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions +// when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). +// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members +// Hopefully, this will be fixed sometime in the future. +// For lack of better alternatives, we currently hardcode the offsets when constant +// expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts +// will then make sure the offsets indeed match the automatically calculated ones). +#define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32)) +#if defined(_MSC_VER) +#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index) +#else +// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler +// really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX +// and then performs a (no-op) cast to size_t iff the second argument matches the expected +// field offset. Otherwise, the compiler will fail to compile this code. +#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ + ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name)) +#endif + // MMIO region 0x1EFxxxxx struct Regs { - enum Id : u32 { - MemoryFill = 0x00004, // + 5,6,7; second block at 8-11 - - FramebufferTop = 0x00117, // + 11a,11b,11c,11d(?),11e...126 - FramebufferBottom = 0x00157, // + 15a,15b,15c,15d(?),15e...166 - - DisplayTransfer = 0x00300, // + 301,302,303,304,305,306 - - CommandProcessor = 0x00638, // + 63a,63c - NumIds = 0x01000 - }; - - template<Id id> - struct Struct; +// helper macro to properly align structure members. +// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121", +// depending on the current source line to make sure variable names are unique. +#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y +#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y) +#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)]; + +// helper macro to make sure the defined structures are of the expected size. +#if defined(_MSC_VER) +// TODO: MSVC does not support using sizeof() on non-static data members even though this +// is technically allowed since C++11. This macro should be enabled once MSVC adds +// support for that. +#define ASSERT_MEMBER_SIZE(name, size_in_bytes) +#else +#define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ + static_assert(sizeof(name) == size_in_bytes, \ + "Structure size and register block length don't match"); +#endif enum class FramebufferFormat : u32 { RGBA8 = 0, @@ -38,135 +63,191 @@ struct Regs { RGB5A1 = 3, RGBA4 = 4, }; -}; -template<> -struct Regs::Struct<Regs::MemoryFill> { - u32 address_start; - u32 address_end; // ? - u32 size; - u32 value; // ? + INSERT_PADDING_WORDS(0x4); - inline u32 GetStartAddress() const { - return address_start * 8; - } + struct { + u32 address_start; + u32 address_end; // ? + u32 size; + u32 value; // ? - inline u32 GetEndAddress() const { - return address_end * 8; - } -}; -static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match"); + inline u32 GetStartAddress() const { + return DecodeAddressRegister(address_start); + } -template<> -struct Regs::Struct<Regs::FramebufferTop> { - using Format = Regs::FramebufferFormat; + inline u32 GetEndAddress() const { + return DecodeAddressRegister(address_end); + } + } memory_fill_config[2]; + ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10); - union { - u32 size; + INSERT_PADDING_WORDS(0x10b); - BitField< 0, 16, u32> width; - BitField<16, 16, u32> height; - }; + struct { + using Format = Regs::FramebufferFormat; - u32 pad0[2]; + union { + u32 size; - u32 address_left1; - u32 address_left2; + BitField< 0, 16, u32> width; + BitField<16, 16, u32> height; + }; - union { - u32 format; + INSERT_PADDING_WORDS(0x2); - BitField< 0, 3, Format> color_format; - }; + u32 address_left1; + u32 address_left2; - u32 pad1; + union { + u32 format; - union { - u32 active_fb; + BitField< 0, 3, Format> color_format; + }; - // 0: Use parameters ending with "1" - // 1: Use parameters ending with "2" - BitField<0, 1, u32> second_fb_active; - }; + INSERT_PADDING_WORDS(0x1); - u32 pad2[5]; + union { + u32 active_fb; - // Distance between two pixel rows, in bytes - u32 stride; + // 0: Use parameters ending with "1" + // 1: Use parameters ending with "2" + BitField<0, 1, u32> second_fb_active; + }; - u32 address_right1; - u32 address_right2; -}; + INSERT_PADDING_WORDS(0x5); -template<> -struct Regs::Struct<Regs::FramebufferBottom> : public Regs::Struct<Regs::FramebufferTop> { -}; -static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match"); + // Distance between two pixel rows, in bytes + u32 stride; -template<> -struct Regs::Struct<Regs::DisplayTransfer> { - using Format = Regs::FramebufferFormat; + u32 address_right1; + u32 address_right2; - u32 input_address; - u32 output_address; + INSERT_PADDING_WORDS(0x30); + } framebuffer_config[2]; + ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100); - inline u32 GetPhysicalInputAddress() const { - return input_address * 8; - } + INSERT_PADDING_WORDS(0x169); - inline u32 GetPhysicalOutputAddress() const { - return output_address * 8; - } + struct { + using Format = Regs::FramebufferFormat; - union { - u32 output_size; + u32 input_address; + u32 output_address; - BitField< 0, 16, u32> output_width; - BitField<16, 16, u32> output_height; - }; + inline u32 GetPhysicalInputAddress() const { + return DecodeAddressRegister(input_address); + } - union { - u32 input_size; + inline u32 GetPhysicalOutputAddress() const { + return DecodeAddressRegister(output_address); + } - BitField< 0, 16, u32> input_width; - BitField<16, 16, u32> input_height; - }; + union { + u32 output_size; - union { - u32 flags; + BitField< 0, 16, u32> output_width; + BitField<16, 16, u32> output_height; + }; - BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true - BitField< 8, 3, Format> input_format; - BitField<12, 3, Format> output_format; - BitField<16, 1, u32> output_tiled; // stores output in a tiled format - }; + union { + u32 input_size; - u32 unknown; + BitField< 0, 16, u32> input_width; + BitField<16, 16, u32> input_height; + }; - // it seems that writing to this field triggers the display transfer - u32 trigger; -}; -static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match"); + union { + u32 flags; -template<> -struct Regs::Struct<Regs::CommandProcessor> { - // command list size - u32 size; + BitField< 0, 1, u32> flip_data; // flips input data horizontally (TODO) if true + BitField< 8, 3, Format> input_format; + BitField<12, 3, Format> output_format; + BitField<16, 1, u32> output_tiled; // stores output in a tiled format + }; - u32 pad0; + INSERT_PADDING_WORDS(0x1); - // command list address - u32 address; + // it seems that writing to this field triggers the display transfer + u32 trigger; + } display_transfer_config; + ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); - u32 pad1; + INSERT_PADDING_WORDS(0x331); - // it seems that writing to this field triggers command list processing - u32 trigger; -}; -static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match"); + struct { + // command list size + u32 size; + + INSERT_PADDING_WORDS(0x1); + + // command list address + u32 address; + + INSERT_PADDING_WORDS(0x1); + + // it seems that writing to this field triggers command list processing + u32 trigger; + inline u32 GetPhysicalAddress() const { + return DecodeAddressRegister(address); + } + } command_processor_config; + ASSERT_MEMBER_SIZE(command_processor_config, 0x14); -extern RegisterSet<u32, Regs> g_regs; + INSERT_PADDING_WORDS(0x9c3); + +#undef INSERT_PADDING_WORDS_HELPER1 +#undef INSERT_PADDING_WORDS_HELPER2 +#undef INSERT_PADDING_WORDS + + static inline int NumIds() { + return sizeof(Regs) / sizeof(u32); + } + + u32& operator [] (int index) const { + u32* content = (u32*)this; + return content[index]; + } + + u32& operator [] (int index) { + u32* content = (u32*)this; + return content[index]; + } + +private: + /* + * Most physical addresses which GPU registers refer to are 8-byte aligned. + * This function should be used to get the address from a raw register value. + */ + static inline u32 DecodeAddressRegister(u32 register_value) { + return register_value * 8; + } +}; +static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); + +// TODO: MSVC does not support using offsetof() on non-static data members even though this +// is technically allowed since C++11. This macro should be enabled once MSVC adds +// support for that. +#ifndef _MSC_VER +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(Regs, field_name) == position * 4, \ + "Field "#field_name" has invalid position") + +ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); +ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); +ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); +ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); +ASSERT_REG_POSITION(display_transfer_config, 0x00300); +ASSERT_REG_POSITION(command_processor_config, 0x00638); + +#undef ASSERT_REG_POSITION +#endif // !defined(_MSC_VER) + +// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. +static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); + +extern Regs g_regs; enum { TOP_ASPECT_X = 0x5, diff --git a/src/video_core/pica.h b/src/video_core/pica.h index f0fa3aba9..d64559d72 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -4,127 +4,173 @@ #pragma once +#include <cstddef> #include <initializer_list> #include <map> #include "common/bit_field.h" #include "common/common_types.h" -#include "common/register_set.h" namespace Pica { +// Returns index corresponding to the Regs member labeled by field_name +// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions +// when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])). +// For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members +// Hopefully, this will be fixed sometime in the future. +// For lack of better alternatives, we currently hardcode the offsets when constant +// expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts +// will then make sure the offsets indeed match the automatically calculated ones). +#define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32)) +#if defined(_MSC_VER) +#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index) +#else +// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler +// really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX +// and then performs a (no-op) cast to size_t iff the second argument matches the expected +// field offset. Otherwise, the compiler will fail to compile this code. +#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ + ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name)) +#endif // _MSC_VER + struct Regs { - enum Id : u32 { - ViewportSizeX = 0x41, - ViewportInvSizeX = 0x42, - ViewportSizeY = 0x43, - ViewportInvSizeY = 0x44, - ViewportCorner = 0x68, - DepthBufferFormat = 0x116, - ColorBufferFormat = 0x117, - DepthBufferAddress = 0x11C, - ColorBufferAddress = 0x11D, - ColorBufferSize = 0x11E, - - VertexArrayBaseAddr = 0x200, - VertexDescriptor = 0x201, // 0x202 - VertexAttributeOffset = 0x203, // 0x206,0x209,0x20C,0x20F,0x212,0x215,0x218,0x21B,0x21E,0x221,0x224 - VertexAttributeInfo0 = 0x204, // 0x207,0x20A,0x20D,0x210,0x213,0x216,0x219,0x21C,0x21F,0x222,0x225 - VertexAttributeInfo1 = 0x205, // 0x208,0x20B,0x20E,0x211,0x214,0x217,0x21A,0x21D,0x220,0x223,0x226 - - NumIds = 0x300, - }; - - template<Id id> - union Struct; + +// helper macro to properly align structure members. +// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121", +// depending on the current source line to make sure variable names are unique. +#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y +#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y) +#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)]; + + INSERT_PADDING_WORDS(0x41); + + BitField<0, 24, u32> viewport_size_x; + INSERT_PADDING_WORDS(1); + BitField<0, 24, u32> viewport_size_y; + + INSERT_PADDING_WORDS(0x1bc); + + union { + enum class Format : u64 { + BYTE = 0, + UBYTE = 1, + SHORT = 2, + FLOAT = 3, + }; + + BitField< 0, 2, Format> format0; + BitField< 2, 2, u64> size0; // number of elements minus 1 + BitField< 4, 2, Format> format1; + BitField< 6, 2, u64> size1; + BitField< 8, 2, Format> format2; + BitField<10, 2, u64> size2; + BitField<12, 2, Format> format3; + BitField<14, 2, u64> size3; + BitField<16, 2, Format> format4; + BitField<18, 2, u64> size4; + BitField<20, 2, Format> format5; + BitField<22, 2, u64> size5; + BitField<24, 2, Format> format6; + BitField<26, 2, u64> size6; + BitField<28, 2, Format> format7; + BitField<30, 2, u64> size7; + BitField<32, 2, Format> format8; + BitField<34, 2, u64> size8; + BitField<36, 2, Format> format9; + BitField<38, 2, u64> size9; + BitField<40, 2, Format> format10; + BitField<42, 2, u64> size10; + BitField<44, 2, Format> format11; + BitField<46, 2, u64> size11; + + BitField<48, 12, u64> attribute_mask; + BitField<60, 4, u64> num_attributes; // number of total attributes minus 1 + } vertex_descriptor; + + INSERT_PADDING_WORDS(0xfe); + +#undef INSERT_PADDING_WORDS_HELPER1 +#undef INSERT_PADDING_WORDS_HELPER2 +#undef INSERT_PADDING_WORDS + + // Map register indices to names readable by humans + // Used for debugging purposes, so performance is not an issue here + static std::string GetCommandName(int index) { + std::map<u32, std::string> map; + Regs regs; + + // TODO: MSVC does not support using offsetof() on non-static data members even though this + // is technically allowed since C++11. Hence, this functionality is disabled until + // MSVC properly supports it. + #ifndef _MSC_VER + #define ADD_FIELD(name) \ + do { \ + map.insert({PICA_REG_INDEX(name), #name}); \ + for (u32 i = PICA_REG_INDEX(name) + 1; i < PICA_REG_INDEX(name) + sizeof(regs.name) / 4; ++i) \ + map.insert({i, #name + std::string("+") + std::to_string(i-PICA_REG_INDEX(name))}); \ + } while(false) + + ADD_FIELD(viewport_size_x); + ADD_FIELD(viewport_size_y); + ADD_FIELD(vertex_descriptor); + + #undef ADD_FIELD + #endif // _MSC_VER + + // Return empty string if no match is found + return map[index]; + } + + static inline int NumIds() { + return sizeof(Regs) / sizeof(u32); + } + + u32& operator [] (int index) const { + u32* content = (u32*)this; + return content[index]; + } + + u32& operator [] (int index) { + u32* content = (u32*)this; + return content[index]; + } + +private: + /* + * Most physical addresses which Pica registers refer to are 8-byte aligned. + * This function should be used to get the address from a raw register value. + */ + static inline u32 DecodeAddressRegister(u32 register_value) { + return register_value * 8; + } }; -static inline Regs::Id VertexAttributeOffset(int n) -{ - return static_cast<Regs::Id>(0x203 + 3*n); -} +// TODO: MSVC does not support using offsetof() on non-static data members even though this +// is technically allowed since C++11. This macro should be enabled once MSVC adds +// support for that. +#ifndef _MSC_VER +#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") + +ASSERT_REG_POSITION(viewport_size_x, 0x41); +ASSERT_REG_POSITION(viewport_size_y, 0x43); +ASSERT_REG_POSITION(vertex_descriptor, 0x200); -static inline Regs::Id VertexAttributeInfo0(int n) -{ - return static_cast<Regs::Id>(0x204 + 3*n); -} +#undef ASSERT_REG_POSITION +#endif // !defined(_MSC_VER) -static inline Regs::Id VertexAttributeInfo1(int n) -{ - return static_cast<Regs::Id>(0x205 + 3*n); -} +// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. +static_assert(sizeof(Regs) == 0x300 * sizeof(u32), "Invalid total size of register set"); union CommandHeader { CommandHeader(u32 h) : hex(h) {} u32 hex; - BitField< 0, 16, Regs::Id> cmd_id; + BitField< 0, 16, u32> cmd_id; BitField<16, 4, u32> parameter_mask; BitField<20, 11, u32> extra_data_length; BitField<31, 1, u32> group_commands; }; -static std::map<Regs::Id, const char*> command_names = { - {Regs::ViewportSizeX, "ViewportSizeX" }, - {Regs::ViewportInvSizeX, "ViewportInvSizeX" }, - {Regs::ViewportSizeY, "ViewportSizeY" }, - {Regs::ViewportInvSizeY, "ViewportInvSizeY" }, - {Regs::ViewportCorner, "ViewportCorner" }, - {Regs::DepthBufferFormat, "DepthBufferFormat" }, - {Regs::ColorBufferFormat, "ColorBufferFormat" }, - {Regs::DepthBufferAddress, "DepthBufferAddress" }, - {Regs::ColorBufferAddress, "ColorBufferAddress" }, - {Regs::ColorBufferSize, "ColorBufferSize" }, -}; - -template<> -union Regs::Struct<Regs::ViewportSizeX> { - BitField<0, 24, u32> value; -}; - -template<> -union Regs::Struct<Regs::ViewportSizeY> { - BitField<0, 24, u32> value; -}; - -template<> -union Regs::Struct<Regs::VertexDescriptor> { - enum class Format : u64 { - BYTE = 0, - UBYTE = 1, - SHORT = 2, - FLOAT = 3, - }; - - BitField< 0, 2, Format> format0; - BitField< 2, 2, u64> size0; // number of elements minus 1 - BitField< 4, 2, Format> format1; - BitField< 6, 2, u64> size1; - BitField< 8, 2, Format> format2; - BitField<10, 2, u64> size2; - BitField<12, 2, Format> format3; - BitField<14, 2, u64> size3; - BitField<16, 2, Format> format4; - BitField<18, 2, u64> size4; - BitField<20, 2, Format> format5; - BitField<22, 2, u64> size5; - BitField<24, 2, Format> format6; - BitField<26, 2, u64> size6; - BitField<28, 2, Format> format7; - BitField<30, 2, u64> size7; - BitField<32, 2, Format> format8; - BitField<34, 2, u64> size8; - BitField<36, 2, Format> format9; - BitField<38, 2, u64> size9; - BitField<40, 2, Format> format10; - BitField<42, 2, u64> size10; - BitField<44, 2, Format> format11; - BitField<46, 2, u64> size11; - - BitField<48, 12, u64> attribute_mask; - BitField<60, 4, u64> num_attributes; // number of total attributes minus 1 -}; - } // namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d0a8ec1da..02b174562 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -78,8 +78,8 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { */ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { - const auto& framebuffer_top = GPU::g_regs.Get<GPU::Regs::FramebufferTop>(); - const auto& framebuffer_sub = GPU::g_regs.Get<GPU::Regs::FramebufferBottom>(); + const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0]; + const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1]; const u32 active_fb_top = (framebuffer_top.active_fb == 1) ? framebuffer_top.address_left2 : framebuffer_top.address_left1; |