1 files changed, 270 insertions, 0 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
new file mode 100644
index 000000000..93830a96a
--- /dev/null
+++ b/src/video_core/vertex_shader.cpp
@@ -0,0 +1,270 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2
+// Refer to the license.txt file included.
+
+#include "pica.h"
+#include "vertex_shader.h"
+#include <core/mem_map.h>
+#include <common/file_util.h>
+
+namespace Pica {
+
+namespace VertexShader {
+
+static struct {
+    Math::Vec4<float24> f[96];
+} shader_uniforms;
+
+
+// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to!
+// For now, we just keep these local arrays around.
+static u32 shader_memory[1024];
+static u32 swizzle_data[1024];
+
+void SubmitShaderMemoryChange(u32 addr, u32 value)
+{
+    shader_memory[addr] = value;
+}
+
+void SubmitSwizzleDataChange(u32 addr, u32 value)
+{
+    swizzle_data[addr] = value;
+}
+
+Math::Vec4<float24>& GetFloatUniform(u32 index)
+{
+    return shader_uniforms.f[index];
+}
+
+struct VertexShaderState {
+    u32* program_counter;
+
+    const float24* input_register_table[16];
+    float24* output_register_table[7*4];
+
+    Math::Vec4<float24> temporary_registers[16];
+    bool status_registers[2];
+
+    enum {
+        INVALID_ADDRESS = 0xFFFFFFFF
+    };
+    u32 call_stack[8]; // TODO: What is the maximal call stack depth?
+    u32* call_stack_pointer;
+};
+
+static void ProcessShaderCode(VertexShaderState& state) {
+    while (true) {
+        bool increment_pc = true;
+        bool exit_loop = false;
+        const Instruction& instr = *(const Instruction*)state.program_counter;
+
+        const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
+                             : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
+                             : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1-0x20].x
+                             : nullptr;
+        const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2]
+                             : &state.temporary_registers[instr.common.src2-0x10].x;
+        // TODO: Unsure about the limit values
+        float24* dest = (instr.common.dest <= 0x1C) ? state.output_register_table[instr.common.dest]
+                             : (instr.common.dest <= 0x3C) ? nullptr
+                             : (instr.common.dest <= 0x7C) ? &state.temporary_registers[(instr.common.dest-0x40)/4][instr.common.dest%4]
+                             : nullptr;
+
+        const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id];
+
+        const float24 src1[4] = {
+            src1_[(int)swizzle.GetSelectorSrc1(0)],
+            src1_[(int)swizzle.GetSelectorSrc1(1)],
+            src1_[(int)swizzle.GetSelectorSrc1(2)],
+            src1_[(int)swizzle.GetSelectorSrc1(3)],
+        };
+        const float24 src2[4] = {
+            src2_[(int)swizzle.GetSelectorSrc2(0)],
+            src2_[(int)swizzle.GetSelectorSrc2(1)],
+            src2_[(int)swizzle.GetSelectorSrc2(2)],
+            src2_[(int)swizzle.GetSelectorSrc2(3)],
+        };
+
+        switch (instr.opcode) {
+            case Instruction::OpCode::ADD:
+            {
+                for (int i = 0; i < 4; ++i) {
+                    if (!swizzle.DestComponentEnabled(i))
+                        continue;
+
+                    dest[i] = src1[i] + src2[i];
+                }
+
+                break;
+            }
+
+            case Instruction::OpCode::MUL:
+            {
+                for (int i = 0; i < 4; ++i) {
+                    if (!swizzle.DestComponentEnabled(i))
+                        continue;
+
+                    dest[i] = src1[i] * src2[i];
+                }
+
+                break;
+            }
+
+            case Instruction::OpCode::DP3:
+            case Instruction::OpCode::DP4:
+            {
+                float24 dot = float24::FromFloat32(0.f);
+                int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
+                for (int i = 0; i < num_components; ++i)
+                    dot = dot + src1[i] * src2[i];
+
+                for (int i = 0; i < num_components; ++i) {
+                    if (!swizzle.DestComponentEnabled(i))
+                        continue;
+
+                    dest[i] = dot;
+                }
+                break;
+            }
+
+            // Reciprocal
+            case Instruction::OpCode::RCP:
+            {
+                for (int i = 0; i < 4; ++i) {
+                    if (!swizzle.DestComponentEnabled(i))
+                        continue;
+
+                    // TODO: Be stable against division by zero!
+                    // TODO: I think this might be wrong... we should only use one component here
+                    dest[i] = float24::FromFloat32(1.0 / src1[i].ToFloat32());
+                }
+
+                break;
+            }
+
+            // Reciprocal Square Root
+            case Instruction::OpCode::RSQ:
+            {
+                for (int i = 0; i < 4; ++i) {
+                    if (!swizzle.DestComponentEnabled(i))
+                        continue;
+
+                    // TODO: Be stable against division by zero!
+                    // TODO: I think this might be wrong... we should only use one component here
+                    dest[i] = float24::FromFloat32(1.0 / sqrt(src1[i].ToFloat32()));
+                }
+
+                break;
+            }
+
+            case Instruction::OpCode::MOV:
+            {
+                for (int i = 0; i < 4; ++i) {
+                    if (!swizzle.DestComponentEnabled(i))
+                        continue;
+
+                    dest[i] = src1[i];
+                }
+                break;
+            }
+
+            case Instruction::OpCode::RET:
+                if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) {
+                    exit_loop = true;
+                } else {
+                    state.program_counter = &shader_memory[*state.call_stack_pointer--];
+                    *state.call_stack_pointer = VertexShaderState::INVALID_ADDRESS;
+                }
+
+                break;
+
+            case Instruction::OpCode::CALL:
+                increment_pc = false;
+
+                _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack));
+
+                *++state.call_stack_pointer = state.program_counter - shader_memory;
+                // TODO: Does this offset refer to the beginning of shader memory?
+                state.program_counter = &shader_memory[instr.flow_control.offset_words];
+                break;
+
+            case Instruction::OpCode::FLS:
+                // TODO: Do whatever needs to be done here?
+                break;
+
+            default:
+                ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
+                          (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex);
+                break;
+        }
+
+        if (increment_pc)
+            ++state.program_counter;
+
+        if (exit_loop)
+            break;
+    }
+}
+
+OutputVertex RunShader(const InputVertex& input, int num_attributes)
+{
+    VertexShaderState state;
+
+    const u32* main = &shader_memory[registers.vs_main_offset];
+    state.program_counter = (u32*)main;
+
+    // Setup input register table
+    const auto& attribute_register_map = registers.vs_input_register_map;
+    float24 dummy_register;
+    std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register);
+    if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
+    if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
+    if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
+    if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
+    if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
+    if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
+    if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
+    if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
+    if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
+    if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
+    if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
+    if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
+    if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
+    if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
+    if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
+    if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
+
+    // Setup output register table
+    OutputVertex ret;
+    for (int i = 0; i < 7; ++i) {
+        const auto& output_register_map = registers.vs_output_attributes[i];
+
+        u32 semantics[4] = {
+            output_register_map.map_x, output_register_map.map_y,
+            output_register_map.map_z, output_register_map.map_w
+        };
+
+        for (int comp = 0; comp < 4; ++comp)
+            state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
+    }
+
+    state.status_registers[0] = false;
+    state.status_registers[1] = false;
+    std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]),
+              VertexShaderState::INVALID_ADDRESS);
+    state.call_stack_pointer = &state.call_stack[0];
+
+    ProcessShaderCode(state);
+
+    DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
+        ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
+        ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
+        ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
+
+    return ret;
+}
+
+
+} // namespace
+
+} // namespace