diff options
Diffstat (limited to 'src/video_core/shader')
-rw-r--r-- | src/video_core/shader/shader.cpp | 41 | ||||
-rw-r--r-- | src/video_core/shader/shader.h | 49 | ||||
-rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 16 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.cpp | 49 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.h | 2 |
5 files changed, 153 insertions, 4 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 67ed19ba8..e9063e616 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -21,7 +21,8 @@ namespace Pica { namespace Shader { -OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { +OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, + const AttributeBuffer& input) { // Setup output data union { OutputVertex ret{}; @@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { } } +UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} + +GSEmitter::GSEmitter() { + handlers = new Handlers; +} + +GSEmitter::~GSEmitter() { + delete handlers; +} + +void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) { + ASSERT(vertex_id < 3); + std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); + if (prim_emit) { + if (winding) + handlers->winding_setter(); + for (size_t i = 0; i < buffer.size(); ++i) { + AttributeBuffer output; + unsigned int output_i = 0; + for (unsigned int reg : Common::BitSet<u32>(output_mask)) { + output.attr[output_i++] = buffer[i][reg]; + } + handlers->vertex_handler(output); + } + } +} + +GSUnitState::GSUnitState() : UnitState(&emitter) {} + +void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { + emitter.handlers->vertex_handler = std::move(vertex_handler); + emitter.handlers->winding_setter = std::move(winding_setter); +} + +void GSUnitState::ConfigOutput(const ShaderRegs& config) { + emitter.output_mask = config.output_mask; +} + MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); #ifdef ARCHITECTURE_x86_64 diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index e156f6aef..a3789da01 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -6,6 +6,7 @@ #include <array> #include <cstddef> +#include <functional> #include <type_traits> #include <nihstro/shader_bytecode.h> #include "common/assert.h" @@ -31,6 +32,12 @@ struct AttributeBuffer { alignas(16) Math::Vec4<float24> attr[16]; }; +/// Handler type for receiving vertex outputs from vertex shader or geometry shader +using VertexHandler = std::function<void(const AttributeBuffer&)>; + +/// Handler type for signaling to invert the vertex order of the next triangle +using WindingSetter = std::function<void()>; + struct OutputVertex { Math::Vec4<float24> pos; Math::Vec4<float24> quat; @@ -43,7 +50,8 @@ struct OutputVertex { INSERT_PADDING_WORDS(1); Math::Vec2<float24> tc2; - static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); + static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, + const AttributeBuffer& output); }; #define ASSERT_POS(var, pos) \ static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \ @@ -61,12 +69,36 @@ static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); /** + * This structure contains state information for primitive emitting in geometry shader. + */ +struct GSEmitter { + std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer; + u8 vertex_id; + bool prim_emit; + bool winding; + u32 output_mask; + + // Function objects are hidden behind a raw pointer to make the structure standard layout type, + // for JIT to use offsetof to access other members. + struct Handlers { + VertexHandler vertex_handler; + WindingSetter winding_setter; + } * handlers; + + GSEmitter(); + ~GSEmitter(); + void Emit(Math::Vec4<float24> (&vertex)[16]); +}; +static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type"); + +/** * This structure contains the state information that needs to be unique for a shader unit. The 3DS * has four shader units that process shaders in parallel. At the present, Citra only implements a * single shader unit that processes all shaders serially. Putting the state information in a struct * here will make it easier for us to parallelize the shader processing later. */ struct UnitState { + explicit UnitState(GSEmitter* emitter = nullptr); struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore // required to be 16-byte aligned. @@ -82,6 +114,8 @@ struct UnitState { // TODO: How many bits do these actually have? s32 address_registers[3]; + GSEmitter* emitter_ptr; + static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: @@ -125,6 +159,19 @@ struct UnitState { void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); }; +/** + * This is an extended shader unit state that represents the special unit that can run both vertex + * shader and geometry shader. It contains an additional primitive emitter and utilities for + * geometry shader. + */ +struct GSUnitState : public UnitState { + GSUnitState(); + void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); + void ConfigOutput(const ShaderRegs& config); + + GSEmitter emitter; +}; + struct ShaderSetup { struct { // The float uniforms are accessed by the shader JIT using SSE instructions, and are diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 206c0978a..9d4da4904 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData break; } + case OpCode::Id::EMIT: { + GSEmitter* emitter = state.emitter_ptr; + ASSERT_MSG(emitter, "Execute EMIT on VS"); + emitter->Emit(state.registers.output); + break; + } + + case OpCode::Id::SETEMIT: { + GSEmitter* emitter = state.emitter_ptr; + ASSERT_MSG(emitter, "Execute SETEMIT on VS"); + emitter->vertex_id = instr.setemit.vertex_id; + emitter->prim_emit = instr.setemit.prim_emit != 0; + emitter->winding = instr.setemit.winding != 0; + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value().EffectiveOpCode(), diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 42a57aab1..1b31623bd 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -75,8 +75,8 @@ const JitFunction instr_table[64] = { &JitShader::Compile_IF, // ifu &JitShader::Compile_IF, // ifc &JitShader::Compile_LOOP, // loop - nullptr, // emit - nullptr, // sete + &JitShader::Compile_EMIT, // emit + &JitShader::Compile_SETE, // sete &JitShader::Compile_JMP, // jmpc &JitShader::Compile_JMP, // jmpu &JitShader::Compile_CMP, // cmp @@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) { } } +static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) { + emitter->Emit(*output); +} + +void JitShader::Compile_EMIT(Instruction instr) { + Label have_emitter, end; + mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + test(rax, rax); + jnz(have_emitter); + + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS")); + CallFarFunction(*this, LogCritical); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + jmp(end); + + L(have_emitter); + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, rax); + mov(ABI_PARAM2, STATE); + add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output))); + CallFarFunction(*this, Emit); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + L(end); +} + +void JitShader::Compile_SETE(Instruction instr) { + Label have_emitter, end; + mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); + test(rax, rax); + jnz(have_emitter); + + ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS")); + CallFarFunction(*this, LogCritical); + ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + jmp(end); + + L(have_emitter); + mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); + mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); + mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); + L(end); +} + void JitShader::Compile_Block(unsigned end) { while (program_counter < end) { Compile_NextInstr(); diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index 31af0ca48..4aee56b1d 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -66,6 +66,8 @@ public: void Compile_JMP(Instruction instr); void Compile_CMP(Instruction instr); void Compile_MAD(Instruction instr); + void Compile_EMIT(Instruction instr); + void Compile_SETE(Instruction instr); private: void Compile_Block(unsigned end); |