diff options
Diffstat (limited to 'src/video_core/shader/shader_jit_x64.cpp')
-rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 151 |
1 files changed, 127 insertions, 24 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 836942c6b..cc66fc8d6 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -23,14 +23,14 @@ const JitFunction instr_table[64] = { &JitCompiler::Compile_ADD, // add &JitCompiler::Compile_DP3, // dp3 &JitCompiler::Compile_DP4, // dp4 - nullptr, // dph + &JitCompiler::Compile_DPH, // dph nullptr, // unknown - nullptr, // ex2 - nullptr, // lg2 + &JitCompiler::Compile_EX2, // ex2 + &JitCompiler::Compile_LG2, // lg2 nullptr, // unknown &JitCompiler::Compile_MUL, // mul - nullptr, // lge - nullptr, // slt + &JitCompiler::Compile_SGE, // sge + &JitCompiler::Compile_SLT, // slt &JitCompiler::Compile_FLR, // flr &JitCompiler::Compile_MAX, // max &JitCompiler::Compile_MIN, // min @@ -44,10 +44,10 @@ const JitFunction instr_table[64] = { nullptr, // unknown nullptr, // unknown nullptr, // unknown - nullptr, // dphi + &JitCompiler::Compile_DPH, // dphi nullptr, // unknown - nullptr, // sgei - &JitCompiler::Compile_SLTI, // slti + &JitCompiler::Compile_SGE, // sgei + &JitCompiler::Compile_SLT, // slti nullptr, // unknown nullptr, // unknown nullptr, // unknown @@ -280,6 +280,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) { CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); } +void JitCompiler::Compile_PushCallerSavedXMM() { +#ifndef _WIN32 + SUB(64, R(RSP), Imm8(2 * 16)); + MOVUPS(MDisp(RSP, 16), ONE); + MOVUPS(MDisp(RSP, 0), NEGBIT); +#endif +} + +void JitCompiler::Compile_PopCallerSavedXMM() { +#ifndef _WIN32 + MOVUPS(NEGBIT, MDisp(RSP, 0)); + MOVUPS(ONE, MDisp(RSP, 16)); + ADD(64, R(RSP), Imm8(2 * 16)); +#endif +} + void JitCompiler::Compile_ADD(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); @@ -331,6 +347,71 @@ void JitCompiler::Compile_DP4(Instruction instr) { Compile_DestEnable(instr, SRC1); } +void JitCompiler::Compile_DPH(Instruction instr) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { + Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); + } else { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + } + + if (Common::GetCPUCaps().sse4_1) { + // Set 4th component to 1.0 + BLENDPS(SRC1, R(ONE), 0x8); // 0b1000 + DPPS(SRC1, R(SRC2), 0xff); + } else { + // Reverse to set the 4th component to 1.0 + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); + MOVSS(SRC1, R(ONE)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); + + MULPS(SRC1, R(SRC2)); + + MOVAPS(SRC2, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY + ADDPS(SRC1, R(SRC2)); + + MOVAPS(SRC2, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX + ADDPS(SRC1, R(SRC2)); + } + + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_EX2(Instruction instr) { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + MOVSS(XMM0, R(SRC1)); + + // The following will actually break the stack alignment + ABI_PushAllCallerSavedRegsAndAdjustStack(); + Compile_PushCallerSavedXMM(); + ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); + Compile_PopCallerSavedXMM(); + ABI_PopAllCallerSavedRegsAndAdjustStack(); + + SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); + MOVAPS(SRC1, R(XMM0)); + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_LG2(Instruction instr) { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + MOVSS(XMM0, R(SRC1)); + + // The following will actually break the stack alignment + ABI_PushAllCallerSavedRegsAndAdjustStack(); + Compile_PushCallerSavedXMM(); + ABI_CallFunction(reinterpret_cast<const void*>(log2f)); + Compile_PopCallerSavedXMM(); + ABI_PopAllCallerSavedRegsAndAdjustStack(); + + SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); + MOVAPS(SRC1, R(XMM0)); + Compile_DestEnable(instr, SRC1); +} + void JitCompiler::Compile_MUL(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); @@ -338,6 +419,36 @@ void JitCompiler::Compile_MUL(Instruction instr) { Compile_DestEnable(instr, SRC1); } +void JitCompiler::Compile_SGE(Instruction instr) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { + Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); + } else { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + } + + CMPPS(SRC1, R(SRC2), CMP_NLT); + ANDPS(SRC1, R(ONE)); + + Compile_DestEnable(instr, SRC1); +} + +void JitCompiler::Compile_SLT(Instruction instr) { + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { + Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); + } else { + Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); + Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); + } + + CMPPS(SRC1, R(SRC2), CMP_LT); + ANDPS(SRC1, R(ONE)); + + Compile_DestEnable(instr, SRC1); +} + void JitCompiler::Compile_FLR(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); @@ -415,22 +526,13 @@ void JitCompiler::Compile_MOV(Instruction instr) { Compile_DestEnable(instr, SRC1); } -void JitCompiler::Compile_SLTI(Instruction instr) { - Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); - Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2); - - CMPSS(SRC1, R(SRC2), CMP_LT); - ANDPS(SRC1, R(ONE)); - - Compile_DestEnable(instr, SRC1); -} - void JitCompiler::Compile_RCP(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); - // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica + // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica // performs this operation more accurately. This should be checked on hardware. - RCPPS(SRC1, R(SRC1)); + RCPSS(SRC1, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX Compile_DestEnable(instr, SRC1); } @@ -438,9 +540,10 @@ void JitCompiler::Compile_RCP(Instruction instr) { void JitCompiler::Compile_RSQ(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); - // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica + // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica // performs this operation more accurately. This should be checked on hardware. - RSQRTPS(SRC1, R(SRC1)); + RSQRTSS(SRC1, R(SRC1)); + SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX Compile_DestEnable(instr, SRC1); } @@ -646,12 +749,12 @@ CompiledShader* JitCompiler::Compile() { // Used to set a register to one static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; MOV(PTRBITS, R(RAX), ImmPtr(&one)); - MOVAPS(ONE, MDisp(RAX, 0)); + MOVAPS(ONE, MatR(RAX)); // Used to negate registers static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; MOV(PTRBITS, R(RAX), ImmPtr(&neg)); - MOVAPS(NEGBIT, MDisp(RAX, 0)); + MOVAPS(NEGBIT, MatR(RAX)); looping = false; |