summaryrefslogtreecommitdiffstats
path: root/src/core/hw
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/hw')
-rw-r--r--src/core/hw/gpu.cpp69
-rw-r--r--src/core/hw/gpu.h32
2 files changed, 76 insertions, 25 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 3ccbc03b2..68ae38289 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <cstring>
+#include <numeric>
#include <type_traits>
#include "common/color.h"
@@ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) {
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
+ if (config.is_texture_copy) {
+ u32 input_width = config.texture_copy.input_width * 16;
+ u32 input_gap = config.texture_copy.input_gap * 16;
+ u32 output_width = config.texture_copy.output_width * 16;
+ u32 output_gap = config.texture_copy.output_gap * 16;
+
+ size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
+ VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size);
+
+ u32 remaining_size = config.texture_copy.size;
+ u32 remaining_input = input_width;
+ u32 remaining_output = output_width;
+ while (remaining_size > 0) {
+ u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
+
+ std::memcpy(dst_pointer, src_pointer, copy_size);
+ src_pointer += copy_size;
+ dst_pointer += copy_size;
+
+ remaining_input -= copy_size;
+ remaining_output -= copy_size;
+ remaining_size -= copy_size;
+
+ if (remaining_input == 0) {
+ remaining_input = input_width;
+ src_pointer += input_gap;
+ }
+ if (remaining_output == 0) {
+ remaining_output = output_width;
+ dst_pointer += output_gap;
+ }
+ }
+
+ LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
+ config.texture_copy.size,
+ config.GetPhysicalInputAddress(), input_width, input_gap,
+ config.GetPhysicalOutputAddress(), output_width, output_gap,
+ config.flags);
+
+ size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
+ VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size);
+
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
+ break;
+ }
+
if (config.scaling > config.ScaleXY) {
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
UNIMPLEMENTED();
break;
}
- if (config.output_tiled &&
- (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
+ if (config.input_linear && config.scaling != config.NoScale) {
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
UNIMPLEMENTED();
break;
@@ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) {
VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size);
- if (config.raw_copy) {
- // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
- // TODO(Subv): Verify if raw copies perform scaling
- memcpy(dst_pointer, src_pointer, output_size);
-
- LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy",
- output_size,
- config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
- config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(),
- config.output_format.Value(), config.flags);
-
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
-
- VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
- break;
- }
-
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
Math::Vec4<u8> src_color;
@@ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) {
u32 src_offset;
u32 dst_offset;
- if (config.output_tiled) {
+ if (config.input_linear) {
if (!config.dont_swizzle) {
// Interpret the input as linear and the output as tiled
u32 coarse_y = y & ~7;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index daad506fe..2e3a9f779 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -201,12 +201,14 @@ struct Regs {
u32 flags;
BitField< 0, 1, u32> flip_vertically; // flips input data vertically
- BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format
- BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing
+ BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format
+ BitField< 2, 1, u32> crop_input_lines;
+ BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields
BitField< 5, 1, u32> dont_swizzle;
BitField< 8, 3, PixelFormat> input_format;
BitField<12, 3, PixelFormat> output_format;
-
+ /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
+ BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
};
@@ -214,10 +216,30 @@ struct Regs {
// it seems that writing to this field triggers the display transfer
u32 trigger;
+
+ INSERT_PADDING_WORDS(0x1);
+
+ struct {
+ u32 size;
+
+ union {
+ u32 input_size;
+
+ BitField< 0, 16, u32> input_width;
+ BitField<16, 16, u32> input_gap;
+ };
+
+ union {
+ u32 output_size;
+
+ BitField< 0, 16, u32> output_width;
+ BitField<16, 16, u32> output_gap;
+ };
+ } texture_copy;
} display_transfer_config;
- ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
+ ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c);
- INSERT_PADDING_WORDS(0x331);
+ INSERT_PADDING_WORDS(0x32D);
struct {
// command list size (in bytes)