summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader/shader_jit_x64.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader/shader_jit_x64.cpp')
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp151
1 files changed, 127 insertions, 24 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 836942c6b..cc66fc8d6 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -23,14 +23,14 @@ const JitFunction instr_table[64] = {
&JitCompiler::Compile_ADD, // add
&JitCompiler::Compile_DP3, // dp3
&JitCompiler::Compile_DP4, // dp4
- nullptr, // dph
+ &JitCompiler::Compile_DPH, // dph
nullptr, // unknown
- nullptr, // ex2
- nullptr, // lg2
+ &JitCompiler::Compile_EX2, // ex2
+ &JitCompiler::Compile_LG2, // lg2
nullptr, // unknown
&JitCompiler::Compile_MUL, // mul
- nullptr, // lge
- nullptr, // slt
+ &JitCompiler::Compile_SGE, // sge
+ &JitCompiler::Compile_SLT, // slt
&JitCompiler::Compile_FLR, // flr
&JitCompiler::Compile_MAX, // max
&JitCompiler::Compile_MIN, // min
@@ -44,10 +44,10 @@ const JitFunction instr_table[64] = {
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
- nullptr, // dphi
+ &JitCompiler::Compile_DPH, // dphi
nullptr, // unknown
- nullptr, // sgei
- &JitCompiler::Compile_SLTI, // slti
+ &JitCompiler::Compile_SGE, // sgei
+ &JitCompiler::Compile_SLT, // slti
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
@@ -280,6 +280,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
}
+void JitCompiler::Compile_PushCallerSavedXMM() {
+#ifndef _WIN32
+ SUB(64, R(RSP), Imm8(2 * 16));
+ MOVUPS(MDisp(RSP, 16), ONE);
+ MOVUPS(MDisp(RSP, 0), NEGBIT);
+#endif
+}
+
+void JitCompiler::Compile_PopCallerSavedXMM() {
+#ifndef _WIN32
+ MOVUPS(NEGBIT, MDisp(RSP, 0));
+ MOVUPS(ONE, MDisp(RSP, 16));
+ ADD(64, R(RSP), Imm8(2 * 16));
+#endif
+}
+
void JitCompiler::Compile_ADD(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
@@ -331,6 +347,71 @@ void JitCompiler::Compile_DP4(Instruction instr) {
Compile_DestEnable(instr, SRC1);
}
+void JitCompiler::Compile_DPH(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
+ if (Common::GetCPUCaps().sse4_1) {
+ // Set 4th component to 1.0
+ BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
+ DPPS(SRC1, R(SRC2), 0xff);
+ } else {
+ // Reverse to set the 4th component to 1.0
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
+ MOVSS(SRC1, R(ONE));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
+
+ MULPS(SRC1, R(SRC2));
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
+ ADDPS(SRC1, R(SRC2));
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
+ ADDPS(SRC1, R(SRC2));
+ }
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_EX2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_LG2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(log2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
+ Compile_DestEnable(instr, SRC1);
+}
+
void JitCompiler::Compile_MUL(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
@@ -338,6 +419,36 @@ void JitCompiler::Compile_MUL(Instruction instr) {
Compile_DestEnable(instr, SRC1);
}
+void JitCompiler::Compile_SGE(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
+ CMPPS(SRC1, R(SRC2), CMP_NLT);
+ ANDPS(SRC1, R(ONE));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_SLT(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
+ CMPPS(SRC1, R(SRC2), CMP_LT);
+ ANDPS(SRC1, R(ONE));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
void JitCompiler::Compile_FLR(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
@@ -415,22 +526,13 @@ void JitCompiler::Compile_MOV(Instruction instr) {
Compile_DestEnable(instr, SRC1);
}
-void JitCompiler::Compile_SLTI(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
- Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2);
-
- CMPSS(SRC1, R(SRC2), CMP_LT);
- ANDPS(SRC1, R(ONE));
-
- Compile_DestEnable(instr, SRC1);
-}
-
void JitCompiler::Compile_RCP(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica
+ // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
// performs this operation more accurately. This should be checked on hardware.
- RCPPS(SRC1, R(SRC1));
+ RCPSS(SRC1, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
Compile_DestEnable(instr, SRC1);
}
@@ -438,9 +540,10 @@ void JitCompiler::Compile_RCP(Instruction instr) {
void JitCompiler::Compile_RSQ(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica
+ // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
// performs this operation more accurately. This should be checked on hardware.
- RSQRTPS(SRC1, R(SRC1));
+ RSQRTSS(SRC1, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
Compile_DestEnable(instr, SRC1);
}
@@ -646,12 +749,12 @@ CompiledShader* JitCompiler::Compile() {
// Used to set a register to one
static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
MOV(PTRBITS, R(RAX), ImmPtr(&one));
- MOVAPS(ONE, MDisp(RAX, 0));
+ MOVAPS(ONE, MatR(RAX));
// Used to negate registers
static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
- MOVAPS(NEGBIT, MDisp(RAX, 0));
+ MOVAPS(NEGBIT, MatR(RAX));
looping = false;