summaryrefslogtreecommitdiffstats
path: root/src/core/hw/gpu.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/hw/gpu.cpp')
-rw-r--r--src/core/hw/gpu.cpp151
1 files changed, 86 insertions, 65 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index a4dfb7e43..0e6b91e3a 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -5,34 +5,26 @@
#include <cstring>
#include <numeric>
#include <type_traits>
-
#include "common/color.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/vector_math.h"
-
-#include "core/settings.h"
-#include "core/memory.h"
#include "core/core_timing.h"
-
#include "core/hle/service/gsp_gpu.h"
#include "core/hle/service/hid/hid.h"
-
-#include "core/hw/hw.h"
#include "core/hw/gpu.h"
-
+#include "core/hw/hw.h"
+#include "core/memory.h"
+#include "core/settings.h"
#include "core/tracer/recorder.h"
-
#include "video_core/command_processor.h"
+#include "video_core/debug_utils/debug_utils.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
-#include "video_core/debug_utils/debug_utils.h"
-
-
namespace GPU {
Regs g_regs;
@@ -49,7 +41,7 @@ static u64 frame_count;
static bool last_skip_frame;
template <typename T>
-inline void Read(T &var, const u32 raw_addr) {
+inline void Read(T& var, const u32 raw_addr) {
u32 addr = raw_addr - HW::VADDR_GPU;
u32 index = addr / 4;
@@ -105,8 +97,7 @@ inline void Write(u32 addr, const T data) {
// Memory fills are triggered once the fill value is written.
case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3):
- case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3):
- {
+ case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): {
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
auto& config = g_regs.memory_fill_config[is_second_filler];
@@ -125,7 +116,9 @@ inline void Write(u32 addr, const T data) {
// regions that were between surfaces or within the touching
// ones for cpu to manually fill here.
if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) {
- Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
+ Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
+ config.GetEndAddress() -
+ config.GetStartAddress());
if (config.fill_24bit) {
// fill with 24-bit values
@@ -150,7 +143,8 @@ inline void Write(u32 addr, const T data) {
}
}
- LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
+ LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(),
+ config.GetEndAddress());
if (!is_second_filler) {
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
@@ -167,15 +161,15 @@ inline void Write(u32 addr, const T data) {
break;
}
- case GPU_REG_INDEX(display_transfer_config.trigger):
- {
+ case GPU_REG_INDEX(display_transfer_config.trigger): {
MICROPROFILE_SCOPE(GPU_DisplayTransfer);
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
if (Pica::g_debug_context)
- Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
+ Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer,
+ nullptr);
if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) {
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
@@ -187,17 +181,23 @@ inline void Write(u32 addr, const T data) {
u32 output_width = config.texture_copy.output_width * 16;
u32 output_gap = config.texture_copy.output_gap * 16;
- size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
- Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size));
+ size_t contiguous_input_size =
+ config.texture_copy.size / input_width * (input_width + input_gap);
+ Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(),
+ static_cast<u32>(contiguous_input_size));
- size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
- Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
+ size_t contiguous_output_size =
+ config.texture_copy.size / output_width * (output_width + output_gap);
+ Memory::RasterizerFlushAndInvalidateRegion(
+ config.GetPhysicalOutputAddress(),
+ static_cast<u32>(contiguous_output_size));
u32 remaining_size = config.texture_copy.size;
u32 remaining_input = input_width;
u32 remaining_output = output_width;
while (remaining_size > 0) {
- u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
+ u32 copy_size =
+ std::min({remaining_input, remaining_output, remaining_size});
std::memcpy(dst_pointer, src_pointer, copy_size);
src_pointer += copy_size;
@@ -217,10 +217,11 @@ inline void Write(u32 addr, const T data) {
}
}
- LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
- config.texture_copy.size,
- config.GetPhysicalInputAddress(), input_width, input_gap,
- config.GetPhysicalOutputAddress(), output_width, output_gap,
+ LOG_TRACE(
+ HW_GPU,
+ "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
+ config.texture_copy.size, config.GetPhysicalInputAddress(), input_width,
+ input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap,
config.flags);
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
@@ -228,7 +229,8 @@ inline void Write(u32 addr, const T data) {
}
if (config.scaling > config.ScaleXY) {
- LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
+ LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u",
+ config.scaling.Value());
UNIMPLEMENTED();
break;
}
@@ -245,11 +247,14 @@ inline void Write(u32 addr, const T data) {
u32 output_width = config.output_width >> horizontal_scale;
u32 output_height = config.output_height >> vertical_scale;
- u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
- u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
+ u32 input_size = config.input_width * config.input_height *
+ GPU::Regs::BytesPerPixel(config.input_format);
+ u32 output_size =
+ output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
- Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
+ Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
+ output_size);
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
@@ -278,11 +283,14 @@ inline void Write(u32 addr, const T data) {
u32 coarse_y = y & ~7;
u32 stride = output_width * dst_bytes_per_pixel;
- src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
- dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
+ src_offset =
+ (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+ dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
+ coarse_y * stride;
} else {
// Both input and output are linear
- src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+ src_offset =
+ (input_x + input_y * config.input_width) * src_bytes_per_pixel;
dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
}
} else {
@@ -291,7 +299,9 @@ inline void Write(u32 addr, const T data) {
u32 coarse_y = input_y & ~7;
u32 stride = config.input_width * src_bytes_per_pixel;
- src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
+ src_offset = VideoCore::GetMortonOffset(input_x, input_y,
+ src_bytes_per_pixel) +
+ coarse_y * stride;
dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
} else {
// Both input and output are tiled
@@ -301,20 +311,27 @@ inline void Write(u32 addr, const T data) {
u32 in_coarse_y = input_y & ~7;
u32 in_stride = config.input_width * src_bytes_per_pixel;
- src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
- dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
+ src_offset = VideoCore::GetMortonOffset(input_x, input_y,
+ src_bytes_per_pixel) +
+ in_coarse_y * in_stride;
+ dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) +
+ out_coarse_y * out_stride;
}
}
const u8* src_pixel = src_pointer + src_offset;
src_color = DecodePixel(config.input_format, src_pixel);
if (config.scaling == config.ScaleX) {
- Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
+ Math::Vec4<u8> pixel =
+ DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
src_color = ((src_color + pixel) / 2).Cast<u8>();
} else if (config.scaling == config.ScaleXY) {
- Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
- Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
- Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
+ Math::Vec4<u8> pixel1 = DecodePixel(
+ config.input_format, src_pixel + 1 * src_bytes_per_pixel);
+ Math::Vec4<u8> pixel2 = DecodePixel(
+ config.input_format, src_pixel + 2 * src_bytes_per_pixel);
+ Math::Vec4<u8> pixel3 = DecodePixel(
+ config.input_format, src_pixel + 3 * src_bytes_per_pixel);
src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
}
@@ -341,17 +358,20 @@ inline void Write(u32 addr, const T data) {
break;
default:
- LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value());
+ LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x",
+ config.output_format.Value());
break;
}
}
}
- LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X",
- config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format),
- config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
- config.GetPhysicalOutputAddress(), output_width, output_height,
- config.output_format.Value(), config.flags);
+ LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> "
+ "0x%08x(%ux%u), dst format %x, flags 0x%08X",
+ config.output_height * output_width *
+ GPU::Regs::BytesPerPixel(config.output_format),
+ config.GetPhysicalInputAddress(), config.input_width.Value(),
+ config.input_height.Value(), config.GetPhysicalOutputAddress(),
+ output_width, output_height, config.output_format.Value(), config.flags);
}
g_regs.display_transfer_config.trigger = 0;
@@ -361,17 +381,16 @@ inline void Write(u32 addr, const T data) {
}
// Seems like writing to this register triggers processing
- case GPU_REG_INDEX(command_processor_config.trigger):
- {
+ case GPU_REG_INDEX(command_processor_config.trigger): {
const auto& config = g_regs.command_processor_config;
- if (config.trigger & 1)
- {
+ if (config.trigger & 1) {
MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
- Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
+ Pica::g_debug_context->recorder->MemoryAccessed(
+ (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
}
Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
@@ -389,16 +408,17 @@ inline void Write(u32 addr, const T data) {
// This is happening *after* handling the write to make sure we properly catch all memory reads.
if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
// addr + GPU VBase - IO VBase + IO PBase
- Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
+ Pica::g_debug_context->recorder->RegisterWritten<T>(
+ addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
}
}
// Explicitly instantiate template functions because we aren't defining this in the header:
-template void Read<u64>(u64 &var, const u32 addr);
-template void Read<u32>(u32 &var, const u32 addr);
-template void Read<u16>(u16 &var, const u32 addr);
-template void Read<u8>(u8 &var, const u32 addr);
+template void Read<u64>(u64& var, const u32 addr);
+template void Read<u32>(u32& var, const u32 addr);
+template void Read<u16>(u16& var, const u32 addr);
+template void Read<u8>(u8& var, const u32 addr);
template void Write<u64>(u32 addr, const u64 data);
template void Write<u32>(u32 addr, const u32 data);
@@ -417,8 +437,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
// - If frameskip == 0 (disabled), always swap buffers
// - If frameskip == 1, swap buffers every other frame (starting from the first frame)
// - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame)
- if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) ||
- Settings::values.frame_skip == 0) {
+ if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) &&
+ last_skip_frame != g_skip_frame) ||
+ Settings::values.frame_skip == 0) {
VideoCore::g_renderer->SwapBuffers();
}
@@ -448,12 +469,12 @@ void Init() {
// .. or at least these are the ones used by system applets.
// There's probably a smarter way to come up with addresses
// like this which does not require hardcoding.
- framebuffer_top.address_left1 = 0x181E6000;
- framebuffer_top.address_left2 = 0x1822C800;
+ framebuffer_top.address_left1 = 0x181E6000;
+ framebuffer_top.address_left2 = 0x1822C800;
framebuffer_top.address_right1 = 0x18273000;
framebuffer_top.address_right2 = 0x182B9800;
- framebuffer_sub.address_left1 = 0x1848F000;
- framebuffer_sub.address_left2 = 0x184C7800;
+ framebuffer_sub.address_left1 = 0x1848F000;
+ framebuffer_sub.address_left2 = 0x184C7800;
framebuffer_top.width.Assign(240);
framebuffer_top.height.Assign(400);