diff options
Diffstat (limited to '')
-rw-r--r-- | src/citra_qt/debugger/graphics.cpp | 2 | ||||
-rw-r--r-- | src/citra_qt/debugger/graphics_framebuffer.cpp | 12 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_interpreter.cpp | 281 | ||||
-rw-r--r-- | src/core/arm/interpreter/armemu.cpp | 78 | ||||
-rw-r--r-- | src/core/hw/gpu.cpp | 6 | ||||
-rw-r--r-- | src/core/hw/gpu.h | 3 | ||||
-rw-r--r-- | src/video_core/color.h | 32 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 28 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 48 | ||||
-rw-r--r-- | src/video_core/pica.h | 84 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 241 | ||||
-rw-r--r-- | src/video_core/utils.h | 26 | ||||
-rw-r--r-- | src/video_core/vertex_shader.cpp | 27 | ||||
-rw-r--r-- | src/video_core/vertex_shader.h | 1 |
14 files changed, 631 insertions, 238 deletions
diff --git a/src/citra_qt/debugger/graphics.cpp b/src/citra_qt/debugger/graphics.cpp index 6ff4c290d..9633d367e 100644 --- a/src/citra_qt/debugger/graphics.cpp +++ b/src/citra_qt/debugger/graphics.cpp @@ -72,7 +72,7 @@ void GPUCommandStreamItemModel::OnGXCommandFinishedInternal(int total_command_co GPUCommandStreamWidget::GPUCommandStreamWidget(QWidget* parent) : QDockWidget(tr("Graphics Debugger"), parent) { - // TODO: set objectName! + setObjectName("GraphicsDebugger"); GPUCommandStreamItemModel* command_model = new GPUCommandStreamItemModel(this); g_debugger.RegisterObserver(command_model); diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index dd41c3880..a9e9de652 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp @@ -10,6 +10,7 @@ #include <QPushButton> #include <QSpinBox> +#include "video_core/color.h" #include "video_core/pica.h" #include "graphics_framebuffer.hxx" @@ -202,7 +203,8 @@ void GraphicsFramebufferWidget::OnUpdate() framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); framebuffer_width = framebuffer.GetWidth(); framebuffer_height = framebuffer.GetHeight(); - framebuffer_format = static_cast<Format>(framebuffer.color_format); + // TODO: It's unknown how this format is actually specified + framebuffer_format = Format::RGBA8; break; } @@ -258,10 +260,10 @@ void GraphicsFramebufferWidget::OnUpdate() for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned x = 0; x < framebuffer_width; ++x) { u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); - u8 r = (value >> 11) & 0x1F; - u8 g = (value >> 6) & 0x1F; - u8 b = (value >> 1) & 0x1F; - u8 a = value & 1; + u8 r = Color::Convert5To8((value >> 11) & 0x1F); + u8 g = Color::Convert5To8((value >> 6) & 0x1F); + u8 b = Color::Convert5To8((value >> 1) & 0x1F); + u8 a = Color::Convert1To8(value & 1); decoded_image.setPixel(x, y, qRgba(r, g, b, 255/*a*/)); } diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index c61ae0053..f2ea0e9dd 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -2171,29 +2171,45 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(rsc)(unsigned int inst, int index) } return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(sadd8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SADD8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(sadd16)(unsigned int inst, int index) +ARM_INST_PTR INTERPRETER_TRANSLATE(sadd8)(unsigned int inst, int index) { arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; - + inst_base->cond = BITS(inst, 28, 31); inst_base->idx = index; inst_base->br = NON_BRANCH; inst_base->load_r15 = 0; - + inst_cream->Rm = BITS(inst, 0, 3); inst_cream->Rn = BITS(inst, 16, 19); inst_cream->Rd = BITS(inst, 12, 15); inst_cream->op1 = BITS(inst, 20, 21); inst_cream->op2 = BITS(inst, 5, 7); - + return inst_base; } +ARM_INST_PTR INTERPRETER_TRANSLATE(sadd16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(sadd8)(inst, index); +} ARM_INST_PTR INTERPRETER_TRANSLATE(saddsubx)(unsigned int inst, int index) { - return INTERPRETER_TRANSLATE(sadd16)(inst, index); + return INTERPRETER_TRANSLATE(sadd8)(inst, index); } +ARM_INST_PTR INTERPRETER_TRANSLATE(ssub8)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(sadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(ssub16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(sadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(ssubaddx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(sadd8)(inst, index); +} + ARM_INST_PTR INTERPRETER_TRANSLATE(sbc)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sbc_inst)); @@ -2236,13 +2252,48 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(sel)(unsigned int inst, int index) return inst_base; } + ARM_INST_PTR INTERPRETER_TRANSLATE(setend)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SETEND"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(shadd16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHADD16"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(shadd8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHADD8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(shaddsubx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHADDSUBX"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(shsub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHSUB16"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(shsub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHSUB8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(shsubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHSUBADDX"); } + +ARM_INST_PTR INTERPRETER_TRANSLATE(shadd8)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->op1 = BITS(inst, 20, 21); + inst_cream->op2 = BITS(inst, 5, 7); + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rn = BITS(inst, 16, 19); + inst_cream->Rd = BITS(inst, 12, 15); + + return inst_base; +} +ARM_INST_PTR INTERPRETER_TRANSLATE(shadd16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(shadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(shaddsubx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(shadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(shsub8)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(shadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(shsub16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(shadd8)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(shsubaddx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(shadd8)(inst, index); +} + ARM_INST_PTR INTERPRETER_TRANSLATE(smla)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(smla_inst)); @@ -2408,15 +2459,7 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(ssat16)(unsigned int inst, int index) return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(ssub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SSUB8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(ssub16)(unsigned int inst, int index) -{ - return INTERPRETER_TRANSLATE(sadd16)(inst, index); -} -ARM_INST_PTR INTERPRETER_TRANSLATE(ssubaddx)(unsigned int inst, int index) -{ - return INTERPRETER_TRANSLATE(sadd16)(inst, index); -} + ARM_INST_PTR INTERPRETER_TRANSLATE(stc)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(stc_inst)); @@ -5039,6 +5082,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) { } SADD8_INST: + SSUB8_INST: SADD16_INST: SADDSUBX_INST: SSUBADDX_INST: @@ -5046,52 +5090,96 @@ unsigned InterpreterMainLoop(ARMul_State* state) { { if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + const u8 op2 = inst_cream->op2; - const s16 rn_lo = (RN & 0xFFFF); - const s16 rn_hi = ((RN >> 16) & 0xFFFF); - const s16 rm_lo = (RM & 0xFFFF); - const s16 rm_hi = ((RM >> 16) & 0xFFFF); + if (op2 == 0x00 || op2 == 0x01 || op2 == 0x02 || op2 == 0x03) { + const s16 rn_lo = (RN & 0xFFFF); + const s16 rn_hi = ((RN >> 16) & 0xFFFF); + const s16 rm_lo = (RM & 0xFFFF); + const s16 rm_hi = ((RM >> 16) & 0xFFFF); - s32 lo_result = 0; - s32 hi_result = 0; + s32 lo_result = 0; + s32 hi_result = 0; - // SADD16 - if (inst_cream->op2 == 0x00) { - lo_result = (rn_lo + rm_lo); - hi_result = (rn_hi + rm_hi); - } - // SASX - else if (inst_cream->op2 == 0x01) { - lo_result = (rn_lo - rm_hi); - hi_result = (rn_hi + rm_lo); - } - // SSAX - else if (inst_cream->op2 == 0x02) { - lo_result = (rn_lo + rm_hi); - hi_result = (rn_hi - rm_lo); - } - // SSUB16 - else if (inst_cream->op2 == 0x03) { - lo_result = (rn_lo - rm_lo); - hi_result = (rn_hi - rm_hi); - } + // SADD16 + if (inst_cream->op2 == 0x00) { + lo_result = (rn_lo + rm_lo); + hi_result = (rn_hi + rm_hi); + } + // SASX + else if (op2 == 0x01) { + lo_result = (rn_lo - rm_hi); + hi_result = (rn_hi + rm_lo); + } + // SSAX + else if (op2 == 0x02) { + lo_result = (rn_lo + rm_hi); + hi_result = (rn_hi - rm_lo); + } + // SSUB16 + else if (op2 == 0x03) { + lo_result = (rn_lo - rm_lo); + hi_result = (rn_hi - rm_hi); + } - RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16); + RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16); - if (lo_result >= 0) { - cpu->Cpsr |= (1 << 16); - cpu->Cpsr |= (1 << 17); - } else { - cpu->Cpsr &= ~(1 << 16); - cpu->Cpsr &= ~(1 << 17); + if (lo_result >= 0) { + cpu->Cpsr |= (1 << 16); + cpu->Cpsr |= (1 << 17); + } else { + cpu->Cpsr &= ~(1 << 16); + cpu->Cpsr &= ~(1 << 17); + } + + if (hi_result >= 0) { + cpu->Cpsr |= (1 << 18); + cpu->Cpsr |= (1 << 19); + } else { + cpu->Cpsr &= ~(1 << 18); + cpu->Cpsr &= ~(1 << 19); + } } + else if (op2 == 0x04 || op2 == 0x07) { + s32 lo_val1, lo_val2; + s32 hi_val1, hi_val2; - if (hi_result >= 0) { - cpu->Cpsr |= (1 << 18); - cpu->Cpsr |= (1 << 19); - } else { - cpu->Cpsr &= ~(1 << 18); - cpu->Cpsr &= ~(1 << 19); + // SADD8 + if (op2 == 0x04) { + lo_val1 = (s32)(s8)(RN & 0xFF) + (s32)(s8)(RM & 0xFF); + lo_val2 = (s32)(s8)((RN >> 8) & 0xFF) + (s32)(s8)((RM >> 8) & 0xFF); + hi_val1 = (s32)(s8)((RN >> 16) & 0xFF) + (s32)(s8)((RM >> 16) & 0xFF); + hi_val2 = (s32)(s8)((RN >> 24) & 0xFF) + (s32)(s8)((RM >> 24) & 0xFF); + } + // SSUB8 + else { + lo_val1 = (s32)(s8)(RN & 0xFF) - (s32)(s8)(RM & 0xFF); + lo_val2 = (s32)(s8)((RN >> 8) & 0xFF) - (s32)(s8)((RM >> 8) & 0xFF); + hi_val1 = (s32)(s8)((RN >> 16) & 0xFF) - (s32)(s8)((RM >> 16) & 0xFF); + hi_val2 = (s32)(s8)((RN >> 24) & 0xFF) - (s32)(s8)((RM >> 24) & 0xFF); + } + + RD = ((lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24)); + + if (lo_val1 >= 0) + cpu->Cpsr |= (1 << 16); + else + cpu->Cpsr &= ~(1 << 16); + + if (lo_val2 >= 0) + cpu->Cpsr |= (1 << 17); + else + cpu->Cpsr &= ~(1 << 17); + + if (hi_val1 >= 0) + cpu->Cpsr |= (1 << 18); + else + cpu->Cpsr &= ~(1 << 18); + + if (hi_val2 >= 0) + cpu->Cpsr |= (1 << 19); + else + cpu->Cpsr &= ~(1 << 19); } } @@ -5176,12 +5264,79 @@ unsigned InterpreterMainLoop(ARMul_State* state) { } SETEND_INST: - SHADD16_INST: + SHADD8_INST: + SHADD16_INST: SHADDSUBX_INST: - SHSUB16_INST: SHSUB8_INST: + SHSUB16_INST: SHSUBADDX_INST: + { + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + const u8 op2 = inst_cream->op2; + const u32 rm_val = RM; + const u32 rn_val = RN; + + if (op2 == 0x00 || op2 == 0x01 || op2 == 0x02 || op2 == 0x03) { + s32 lo_result = 0; + s32 hi_result = 0; + + // SHADD16 + if (op2 == 0x00) { + lo_result = ((s16)(rn_val & 0xFFFF) + (s16)(rm_val & 0xFFFF)) >> 1; + hi_result = ((s16)((rn_val >> 16) & 0xFFFF) + (s16)((rm_val >> 16) & 0xFFFF)) >> 1; + } + // SHASX + else if (op2 == 0x01) { + lo_result = ((s16)(rn_val & 0xFFFF) - (s16)((rm_val >> 16) & 0xFFFF)) >> 1; + hi_result = ((s16)((rn_val >> 16) & 0xFFFF) + (s16)(rm_val & 0xFFFF)) >> 1; + } + // SHSAX + else if (op2 == 0x02) { + lo_result = ((s16)(rn_val & 0xFFFF) + (s16)((rm_val >> 16) & 0xFFFF)) >> 1; + hi_result = ((s16)((rn_val >> 16) & 0xFFFF) - (s16)(rm_val & 0xFFFF)) >> 1; + } + // SHSUB16 + else if (op2 == 0x03) { + lo_result = ((s16)(rn_val & 0xFFFF) - (s16)(rm_val & 0xFFFF)) >> 1; + hi_result = ((s16)((rn_val >> 16) & 0xFFFF) - (s16)((rm_val >> 16) & 0xFFFF)) >> 1; + } + + RD = ((lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16)); + } + else if (op2 == 0x04 || op2 == 0x07) { + s16 lo_val1, lo_val2; + s16 hi_val1, hi_val2; + + // SHADD8 + if (op2 == 0x04) { + lo_val1 = ((s8)(rn_val & 0xFF) + (s8)(rm_val & 0xFF)) >> 1; + lo_val2 = ((s8)((rn_val >> 8) & 0xFF) + (s8)((rm_val >> 8) & 0xFF)) >> 1; + + hi_val1 = ((s8)((rn_val >> 16) & 0xFF) + (s8)((rm_val >> 16) & 0xFF)) >> 1; + hi_val2 = ((s8)((rn_val >> 24) & 0xFF) + (s8)((rm_val >> 24) & 0xFF)) >> 1; + } + // SHSUB8 + else { + lo_val1 = ((s8)(rn_val & 0xFF) - (s8)(rm_val & 0xFF)) >> 1; + lo_val2 = ((s8)((rn_val >> 8) & 0xFF) - (s8)((rm_val >> 8) & 0xFF)) >> 1; + + hi_val1 = ((s8)((rn_val >> 16) & 0xFF) - (s8)((rm_val >> 16) & 0xFF)) >> 1; + hi_val2 = ((s8)((rn_val >> 24) & 0xFF) - (s8)((rm_val >> 24) & 0xFF)) >> 1; + } + + RD = (lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24); + } + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(generic_arm_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + SMLA_INST: { if ((inst_base->cond == 0xe) || CondPassed(cpu, inst_base->cond)) { @@ -5407,7 +5562,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) { FETCH_INST; GOTO_NEXT_INST; } - SSUB8_INST: + STC_INST: { // Instruction not implemented diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp index b9c2aa6c2..43b1ba40e 100644 --- a/src/core/arm/interpreter/armemu.cpp +++ b/src/core/arm/interpreter/armemu.cpp @@ -5881,67 +5881,45 @@ L_stm_s_takeabort: const u32 rm_val = state->Reg[rm_idx]; const u32 rn_val = state->Reg[rn_idx]; - u8 lo_val1; - u8 lo_val2; - u8 hi_val1; - u8 hi_val2; + s32 lo_val1, lo_val2; + s32 hi_val1, hi_val2; // SADD8 if ((instr & 0xFF0) == 0xf90) { - lo_val1 = (u8)((rn_val & 0xFF) + (rm_val & 0xFF)); - lo_val2 = (u8)(((rn_val >> 8) & 0xFF) + ((rm_val >> 8) & 0xFF)); - hi_val1 = (u8)(((rn_val >> 16) & 0xFF) + ((rm_val >> 16) & 0xFF)); - hi_val2 = (u8)(((rn_val >> 24) & 0xFF) + ((rm_val >> 24) & 0xFF)); - - if (lo_val1 & 0x80) - state->GEFlag |= (1 << 16); - else - state->GEFlag &= ~(1 << 16); - - if (lo_val2 & 0x80) - state->GEFlag |= (1 << 17); - else - state->GEFlag &= ~(1 << 17); - - if (hi_val1 & 0x80) - state->GEFlag |= (1 << 18); - else - state->GEFlag &= ~(1 << 18); - - if (hi_val2 & 0x80) - state->GEFlag |= (1 << 19); - else - state->GEFlag &= ~(1 << 19); + lo_val1 = (s32)(s8)(rn_val & 0xFF) + (s32)(s8)(rm_val & 0xFF); + lo_val2 = (s32)(s8)((rn_val >> 8) & 0xFF) + (s32)(s8)((rm_val >> 8) & 0xFF); + hi_val1 = (s32)(s8)((rn_val >> 16) & 0xFF) + (s32)(s8)((rm_val >> 16) & 0xFF); + hi_val2 = (s32)(s8)((rn_val >> 24) & 0xFF) + (s32)(s8)((rm_val >> 24) & 0xFF); } // SSUB8 else { - lo_val1 = (u8)((rn_val & 0xFF) - (rm_val & 0xFF)); - lo_val2 = (u8)(((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF)); - hi_val1 = (u8)(((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF)); - hi_val2 = (u8)(((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF)); + lo_val1 = (s32)(s8)(rn_val & 0xFF) - (s32)(s8)(rm_val & 0xFF); + lo_val2 = (s32)(s8)((rn_val >> 8) & 0xFF) - (s32)(s8)((rm_val >> 8) & 0xFF); + hi_val1 = (s32)(s8)((rn_val >> 16) & 0xFF) - (s32)(s8)((rm_val >> 16) & 0xFF); + hi_val2 = (s32)(s8)((rn_val >> 24) & 0xFF) - (s32)(s8)((rm_val >> 24) & 0xFF); + } - if (!(lo_val1 & 0x80)) - state->GEFlag |= (1 << 16); - else - state->GEFlag &= ~(1 << 16); + if (lo_val1 >= 0) + state->GEFlag |= (1 << 16); + else + state->GEFlag &= ~(1 << 16); - if (!(lo_val2 & 0x80)) - state->GEFlag |= (1 << 17); - else - state->GEFlag &= ~(1 << 17); + if (lo_val2 >= 0) + state->GEFlag |= (1 << 17); + else + state->GEFlag &= ~(1 << 17); - if (!(hi_val1 & 0x80)) - state->GEFlag |= (1 << 18); - else - state->GEFlag &= ~(1 << 18); + if (hi_val1 >= 0) + state->GEFlag |= (1 << 18); + else + state->GEFlag &= ~(1 << 18); - if (!(hi_val2 & 0x80)) - state->GEFlag |= (1 << 19); - else - state->GEFlag &= ~(1 << 19); - } + if (hi_val2 >= 0) + state->GEFlag |= (1 << 19); + else + state->GEFlag &= ~(1 << 19); - state->Reg[rd_idx] = (lo_val1 | lo_val2 << 8 | hi_val1 << 16 | hi_val2 << 24); + state->Reg[rd_idx] = ((lo_val1 & 0xFF) | ((lo_val2 & 0xFF) << 8) | ((hi_val1 & 0xFF) << 16) | ((hi_val2 & 0xFF) << 24)); return 1; } else { diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index dd619cb16..0ff6c6cde 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -94,11 +94,15 @@ inline void Write(u32 addr, const T data) { int r, g, b, a; } source_color = { 0, 0, 0, 0 }; + // Cheap emulation of horizontal scaling: Just skip each second pixel of the + // input framebuffer. We keep track of this in the pixel_skip variable. + unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; + switch (config.input_format) { case Regs::PixelFormat::RGBA8: { // TODO: Most likely got the component order messed up. - u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4; + u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip; source_color.r = srcptr[0]; // blue source_color.g = srcptr[1]; // green source_color.b = srcptr[2]; // red diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 292f496c1..7de055232 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -157,6 +157,9 @@ struct Regs { BitField< 8, 3, PixelFormat> input_format; BitField<12, 3, PixelFormat> output_format; BitField<16, 1, u32> output_tiled; // stores output in a tiled format + + // TODO: Not really sure if this actually scales, or even resizes at all. + BitField<24, 1, u32> scale_horizontally; }; INSERT_PADDING_WORDS(0x1); diff --git a/src/video_core/color.h b/src/video_core/color.h new file mode 100644 index 000000000..e86ac1265 --- /dev/null +++ b/src/video_core/color.h @@ -0,0 +1,32 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Color { + +/// Convert a 1-bit color component to 8 bit +static inline u8 Convert1To8(u8 value) { + return value * 255; +} + +/// Convert a 4-bit color component to 8 bit +static inline u8 Convert4To8(u8 value) { + return (value << 4) | value; +} + +/// Convert a 5-bit color component to 8 bit +static inline u8 Convert5To8(u8 value) { + return (value << 3) | (value >> 2); +} + +/// Convert a 6-bit color component to 8 bit +static inline u8 Convert6To8(u8 value) { + return (value << 2) | (value >> 4); +} + + +} // namespace diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 9602779f4..0d9f4ba66 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -112,6 +112,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Initialize data for the current vertex VertexShader::InputVertex input; + // Load a debugging token to check whether this gets loaded by the running + // application or not. + static const float24 debug_token = float24::FromRawFloat24(0x00abcdef); + input.attr[0].w = debug_token; + for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); @@ -136,6 +141,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } } + // HACK: Some games do not initialize the vertex position's w component. This leads + // to critical issues since it messes up perspective division. As a + // workaround, we force the fourth component to 1.0 if we find this to be the + // case. + // To do this, we additionally have to assume that the first input attribute + // is the vertex position, since there's no information about this other than + // the empiric observation that this is usually the case. + if (input.attr[0].w == debug_token) + input.attr[0].w = float24::FromFloat32(1.0); + if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); @@ -173,6 +188,19 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { break; + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): + { + int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); + auto values = registers.vs_int_uniforms[index]; + VertexShader::GetIntUniform(index) = Math::Vec4<u8>(values.x, values.y, values.z, values.w); + LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", + index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); + break; + } + case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 5921185a6..a494465b9 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -19,6 +19,7 @@ #include "common/log.h" #include "common/file_util.h" +#include "video_core/color.h" #include "video_core/math.h" #include "video_core/pica.h" @@ -359,29 +360,26 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture u8 g = ((source_ptr) >> 6) & 0x1F; u8 b = (source_ptr >> 1) & 0x1F; u8 a = source_ptr & 1; - return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255)); + return Math::MakeVec<u8>(Color::Convert5To8(r), Color::Convert5To8(g), + Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a)); } case Regs::TextureFormat::RGB565: { const u16 source_ptr = *(const u16*)(source + offset * 2); - u8 r = (source_ptr >> 11) & 0x1F; - u8 g = ((source_ptr) >> 5) & 0x3F; - u8 b = (source_ptr) & 0x1F; - return Math::MakeVec<u8>((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255); + u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); + u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); + u8 b = Color::Convert5To8((source_ptr) & 0x1F); + return Math::MakeVec<u8>(r, g, b, 255); } case Regs::TextureFormat::RGBA4: { const u8* source_ptr = source + offset * 2; - u8 r = source_ptr[1] >> 4; - u8 g = source_ptr[1] & 0xFF; - u8 b = source_ptr[0] >> 4; - u8 a = source_ptr[0] & 0xFF; - r = (r << 4) | r; - g = (g << 4) | g; - b = (b << 4) | b; - a = (a << 4) | a; + u8 r = Color::Convert4To8(source_ptr[1] >> 4); + u8 g = Color::Convert4To8(source_ptr[1] & 0xF); + u8 b = Color::Convert4To8(source_ptr[0] >> 4); + u8 a = Color::Convert4To8(source_ptr[0] & 0xF); return { r, g, b, disable_alpha ? (u8)255 : a }; } @@ -389,13 +387,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + offset * 2; - // TODO: component order not verified - if (disable_alpha) { // Show intensity as red, alpha as green - return { source_ptr[0], source_ptr[1], 0, 255 }; + return { source_ptr[1], source_ptr[0], 0, 255 }; } else { - return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]}; + return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; } } @@ -418,14 +414,10 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture case Regs::TextureFormat::IA4: { - const u8* source_ptr = source + offset / 2; - - // TODO: component order not verified + const u8* source_ptr = source + offset; - u8 i = (*source_ptr) & 0xF; - u8 a = ((*source_ptr) & 0xF0) >> 4; - a |= a << 4; - i |= i << 4; + u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); + u8 a = Color::Convert4To8((*source_ptr) & 0xF); if (disable_alpha) { // Show intensity as red, alpha as green @@ -439,15 +431,13 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + offset / 2; - // TODO: component order not verified - u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); - a |= a << 4; + a = Color::Convert4To8(a); if (disable_alpha) { - return { *source_ptr, *source_ptr, *source_ptr, 255 }; + return { a, a, a, 255 }; } else { - return { 0, 0, 0, *source_ptr }; + return { 0, 0, 0, a }; } } diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 38bac748c..f5771ed84 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -50,7 +50,19 @@ struct Regs { u32 trigger_irq; - INSERT_PADDING_WORDS(0x30); + INSERT_PADDING_WORDS(0x2f); + + enum class CullMode : u32 { + // Select which polygons are considered to be "frontfacing". + KeepAll = 0, + KeepClockWise = 1, + KeepCounterClockWise = 2, + // TODO: What does the third value imply? + }; + + union { + BitField<0, 2, CullMode> cull_mode; + }; BitField<0, 24, u32> viewport_size_x; @@ -289,7 +301,7 @@ struct Regs { TevStageConfig tev_stage4; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage5; - INSERT_PADDING_WORDS(0x13); + INSERT_PADDING_WORDS(0x3); const std::array<Regs::TevStageConfig,6> GetTevStages() const { return { tev_stage0, tev_stage1, @@ -298,6 +310,60 @@ struct Regs { }; struct { + enum DepthFunc : u32 { + Always = 1, + LessThan = 4, + GreaterThan = 6, + }; + + union { + // If false, logic blending is used + BitField<8, 1, u32> alphablend_enable; + }; + + union { + enum BlendEquation : u32 { + Add = 0, + }; + + enum BlendFactor : u32 { + Zero = 0, + One = 1, + + SourceAlpha = 6, + OneMinusSourceAlpha = 7, + }; + + BitField< 0, 8, BlendEquation> blend_equation_rgb; + BitField< 8, 8, BlendEquation> blend_equation_a; + + BitField<16, 4, BlendFactor> factor_source_rgb; + BitField<20, 4, BlendFactor> factor_dest_rgb; + + BitField<24, 4, BlendFactor> factor_source_a; + BitField<28, 4, BlendFactor> factor_dest_a; + } alpha_blending; + + union { + enum Op { + Set = 4, + }; + + BitField<0, 4, Op> op; + } logic_op; + + INSERT_PADDING_WORDS(0x4); + + union { + BitField< 0, 1, u32> depth_test_enable; + BitField< 4, 3, DepthFunc> depth_test_func; + BitField<12, 1, u32> depth_write_enable; + }; + + INSERT_PADDING_WORDS(0x8); + } output_merger; + + struct { enum ColorFormat : u32 { RGBA8 = 0, RGB8 = 1, @@ -495,8 +561,14 @@ struct Regs { INSERT_PADDING_WORDS(0x51); BitField<0, 16, u32> vs_bool_uniforms; + union { + BitField< 0, 8, u32> x; + BitField< 8, 8, u32> y; + BitField<16, 8, u32> z; + BitField<24, 8, u32> w; + } vs_int_uniforms[4]; - INSERT_PADDING_WORDS(0x9); + INSERT_PADDING_WORDS(0x5); // Offset to shader program entry point (in words) BitField<0, 16, u32> vs_main_offset; @@ -599,6 +671,7 @@ struct Regs { } while(false) ADD_FIELD(trigger_irq); + ADD_FIELD(cull_mode); ADD_FIELD(viewport_size_x); ADD_FIELD(viewport_size_y); ADD_FIELD(viewport_depth_range); @@ -617,6 +690,7 @@ struct Regs { ADD_FIELD(tev_stage3); ADD_FIELD(tev_stage4); ADD_FIELD(tev_stage5); + ADD_FIELD(output_merger); ADD_FIELD(framebuffer); ADD_FIELD(vertex_attributes); ADD_FIELD(index_array); @@ -625,6 +699,7 @@ struct Regs { ADD_FIELD(trigger_draw_indexed); ADD_FIELD(triangle_topology); ADD_FIELD(vs_bool_uniforms); + ADD_FIELD(vs_int_uniforms); ADD_FIELD(vs_main_offset); ADD_FIELD(vs_input_register_map); ADD_FIELD(vs_uniform_setup); @@ -668,6 +743,7 @@ private: #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") ASSERT_REG_POSITION(trigger_irq, 0x10); +ASSERT_REG_POSITION(cull_mode, 0x40); ASSERT_REG_POSITION(viewport_size_x, 0x41); ASSERT_REG_POSITION(viewport_size_y, 0x43); ASSERT_REG_POSITION(viewport_depth_range, 0x4d); @@ -688,6 +764,7 @@ ASSERT_REG_POSITION(tev_stage2, 0xd0); ASSERT_REG_POSITION(tev_stage3, 0xd8); ASSERT_REG_POSITION(tev_stage4, 0xf0); ASSERT_REG_POSITION(tev_stage5, 0xf8); +ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); @@ -696,6 +773,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); +ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); ASSERT_REG_POSITION(vs_main_offset, 0x2ba); ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a80148872..025d4e484 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -18,51 +18,82 @@ namespace Pica { namespace Rasterizer { static void DrawPixel(int x, int y, const Math::Vec4<u8>& color) { - u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress()))); + const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); + u32* color_buffer = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); // Assuming RGBA8 format until actual framebuffer format handling is implemented *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; } +static const Math::Vec4<u8> GetPixel(int x, int y) { + const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); + u32* color_buffer_u32 = reinterpret_cast<u32*>(Memory::GetPointer(PAddrToVAddr(addr))); + + u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); + Math::Vec4<u8> ret; + ret.a() = value >> 24; + ret.r() = (value >> 16) & 0xFF; + ret.g() = (value >> 8) & 0xFF; + ret.b() = value & 0xFF; + return ret; + } + static u32 GetDepth(int x, int y) { - u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); + const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); + u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); // Assuming 16-bit depth buffer format until actual format handling is implemented return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); } static void SetDepth(int x, int y, u16 value) { - u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); + const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); + u16* depth_buffer = reinterpret_cast<u16*>(Memory::GetPointer(PAddrToVAddr(addr))); // Assuming 16-bit depth buffer format until actual format handling is implemented *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; } -void ProcessTriangle(const VertexShader::OutputVertex& v0, - const VertexShader::OutputVertex& v1, - const VertexShader::OutputVertex& v2) -{ - // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values - struct Fix12P4 { - Fix12P4() {} - Fix12P4(u16 val) : val(val) {} +// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values +struct Fix12P4 { + Fix12P4() {} + Fix12P4(u16 val) : val(val) {} - static u16 FracMask() { return 0xF; } - static u16 IntMask() { return (u16)~0xF; } + static u16 FracMask() { return 0xF; } + static u16 IntMask() { return (u16)~0xF; } - operator u16() const { - return val; - } + operator u16() const { + return val; + } - bool operator < (const Fix12P4& oth) const { - return (u16)*this < (u16)oth; - } + bool operator < (const Fix12P4& oth) const { + return (u16)*this < (u16)oth; + } - private: - u16 val; - }; +private: + u16 val; +}; + +/** + * Calculate signed area of the triangle spanned by the three argument vertices. + * The sign denotes an orientation. + * + * @todo define orientation concretely. + */ +static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, + const Math::Vec2<Fix12P4>& vtx2, + const Math::Vec2<Fix12P4>& vtx3) { + const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); + const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); + // TODO: There is a very small chance this will overflow for sizeof(int) == 4 + return Math::Cross(vec1, vec2).z; +}; +void ProcessTriangle(const VertexShader::OutputVertex& v0, + const VertexShader::OutputVertex& v1, + const VertexShader::OutputVertex& v2) +{ // vertex positions in rasterizer coordinates auto FloatToFix = [](float24 flt) { return Fix12P4(static_cast<unsigned short>(flt.ToFloat32() * 16.0f)); @@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3<float24> vec) { return Math::Vec3<Fix12P4>{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; }; + Math::Vec3<Fix12P4> vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), ScreenToRasterizerCoordinates(v1.screenpos), ScreenToRasterizerCoordinates(v2.screenpos) }; + if (registers.cull_mode == Regs::CullMode::KeepClockWise) { + // Reverse vertex order and use the CCW code path. + std::swap(vtxpos[1], vtxpos[2]); + } + + if (registers.cull_mode != Regs::CullMode::KeepAll) { + // Cull away triangles which are wound clockwise. + // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll + if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) + return; + } + // TODO: Proper scissor rect test! u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); @@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, for (u16 x = min_x; x < max_x; x += 0x10) { // Calculate the barycentric coordinates w0, w1 and w2 - auto orient2d = [](const Math::Vec2<Fix12P4>& vtx1, - const Math::Vec2<Fix12P4>& vtx2, - const Math::Vec2<Fix12P4>& vtx3) { - const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); - const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); - // TODO: There is a very small chance this will overflow for sizeof(int) == 4 - return Math::Cross(vec1, vec2).z; - }; - - int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); - int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); - int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); + int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); + int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); + int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); int wsum = w0 + w1 + w2; // If current pixel is not covered by the current primitive @@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, return 0; } }; - s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); - t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); + s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); + t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); @@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { + static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { switch (factor) { case ColorModifier::SourceColor: return values.rgb(); + case ColorModifier::OneMinusSourceColor: + return (Math::Vec3<u8>(255, 255, 255) - values.rgb()).Cast<u8>(); + case ColorModifier::SourceAlpha: return { values.a(), values.a(), values.a() }; @@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { + static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { switch (factor) { case AlphaModifier::SourceAlpha: return value; @@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { + static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { switch (op) { case Operation::Replace: return input[0]; @@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Lerp: return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); + case Operation::Subtract: + { + auto result = input[0].Cast<int>() - input[1].Cast<int>(); + result.r() = std::max(0, result.r()); + result.g() = std::max(0, result.g()); + result.b() = std::max(0, result.b()); + return result.Cast<u8>(); + } + default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); _dbg_assert_(HW_GPU, 0); @@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { + static auto AlphaCombine = [](Operation op, const std::array<u8,3>& input) -> u8 { switch (op) { case Operation::Replace: return input[0]; @@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Lerp: return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; + case Operation::Subtract: + return std::max(0, (int)input[0] - (int)input[1]); + default: LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); _dbg_assert_(HW_GPU, 0); @@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, combiner_output = Math::MakeVec(color_output, alpha_output); } - // TODO: Not sure if the multiplication by 65535 has already been taken care - // of when transforming to screen coordinates or not. - u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + - (float)v1.screenpos[2].ToFloat32() * w1 + - (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); - SetDepth(x >> 4, y >> 4, z); + // TODO: Does depth indeed only get written even if depth testing is enabled? + if (registers.output_merger.depth_test_enable) { + u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); + u16 ref_z = GetDepth(x >> 4, y >> 4); + + bool pass = false; + + switch (registers.output_merger.depth_test_func) { + case registers.output_merger.Always: + pass = true; + break; + + case registers.output_merger.LessThan: + pass = z < ref_z; + break; + + case registers.output_merger.GreaterThan: + pass = z > ref_z; + break; + + default: + LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value()); + break; + } + + if (!pass) + continue; + + if (registers.output_merger.depth_write_enable) + SetDepth(x >> 4, y >> 4, z); + } + + auto dest = GetPixel(x >> 4, y >> 4); + + if (registers.output_merger.alphablend_enable) { + auto params = registers.output_merger.alpha_blending; + + auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3<u8> { + switch(factor) { + case params.Zero: + return Math::Vec3<u8>(0, 0, 0); + + case params.One: + return Math::Vec3<u8>(255, 255, 255); + + case params.SourceAlpha: + return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a()); + + case params.OneMinusSourceAlpha: + return Math::Vec3<u8>(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a()); + + default: + LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); + exit(0); + break; + } + }; + + auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { + switch(factor) { + case params.Zero: + return 0; + + case params.One: + return 255; + + case params.SourceAlpha: + return combiner_output.a(); + + case params.OneMinusSourceAlpha: + return 255 - combiner_output.a(); + + default: + LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); + exit(0); + break; + } + }; + + auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), + LookupFactorA(params.factor_source_a)); + auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), + LookupFactorA(params.factor_dest_a)); + + switch (params.blend_equation_rgb) { + case params.Add: + { + auto result = (combiner_output * srcfactor + dest * dstfactor) / 255; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + combiner_output = result.Cast<u8>(); + break; + } + + default: + LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value()); + exit(0); + } + } else { + LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); + exit(0); + } DrawPixel(x >> 4, y >> 4, combiner_output); } diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 63ebccbde..6fd640425 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -8,32 +8,6 @@ #include "common/common_types.h" -namespace FormatPrecision { - -/// Adjust RGBA8 color with RGBA6 precision -static inline u32 rgba8_with_rgba6(u32 src) { - u32 color = src; - color &= 0xFCFCFCFC; - color |= (color >> 6) & 0x03030303; - return color; -} - -/// Adjust RGBA8 color with RGB565 precision -static inline u32 rgba8_with_rgb565(u32 src) { - u32 color = (src & 0xF8FCF8); - color |= (color >> 5) & 0x070007; - color |= (color >> 6) & 0x000300; - color |= 0xFF000000; - return color; -} - -/// Adjust Z24 depth value with Z16 precision -static inline u32 z24_with_z16(u32 src) { - return (src & 0xFFFF00) | (src >> 16); -} - -} // namespace - namespace VideoCore { /// Structure for the TGA texture format (for dumping) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index bed5081a0..ff825e2e1 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -30,6 +30,8 @@ static struct { Math::Vec4<float24> f[96]; std::array<bool,16> b; + + std::array<Math::Vec4<u8>,4> i; } shader_uniforms; // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! @@ -37,33 +39,31 @@ static struct { static std::array<u32, 1024> shader_memory; static std::array<u32, 1024> swizzle_data; -void SubmitShaderMemoryChange(u32 addr, u32 value) -{ +void SubmitShaderMemoryChange(u32 addr, u32 value) { shader_memory[addr] = value; } -void SubmitSwizzleDataChange(u32 addr, u32 value) -{ +void SubmitSwizzleDataChange(u32 addr, u32 value) { swizzle_data[addr] = value; } -Math::Vec4<float24>& GetFloatUniform(u32 index) -{ +Math::Vec4<float24>& GetFloatUniform(u32 index) { return shader_uniforms.f[index]; } -bool& GetBoolUniform(u32 index) -{ +bool& GetBoolUniform(u32 index) { return shader_uniforms.b[index]; } -const std::array<u32, 1024>& GetShaderBinary() -{ +Math::Vec4<u8>& GetIntUniform(u32 index) { + return shader_uniforms.i[index]; +} + +const std::array<u32, 1024>& GetShaderBinary() { return shader_memory; } -const std::array<u32, 1024>& GetSwizzlePatterns() -{ +const std::array<u32, 1024>& GetSwizzlePatterns() { return swizzle_data; } @@ -437,8 +437,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } } -OutputVertex RunShader(const InputVertex& input, int num_attributes) -{ +OutputVertex RunShader(const InputVertex& input, int num_attributes) { VertexShaderState state; const u32* main = &shader_memory[registers.vs_main_offset]; diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index af3fb2a2f..3a68a3409 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -73,6 +73,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes); Math::Vec4<float24>& GetFloatUniform(u32 index); bool& GetBoolUniform(u32 index); +Math::Vec4<u8>& GetIntUniform(u32 index); const std::array<u32, 1024>& GetShaderBinary(); const std::array<u32, 1024>& GetSwizzlePatterns(); |