diff options
Diffstat (limited to 'src/common/x64/emitter.h')
-rw-r--r-- | src/common/x64/emitter.h | 849 |
1 files changed, 414 insertions, 435 deletions
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index e9c924126..86f4a1fff 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h @@ -328,8 +328,6 @@ enum SSECompare ORD, }; -typedef const u8* JumpTarget; - class XEmitter { friend struct OpArg; // for Write8 etc @@ -344,27 +342,27 @@ private: void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); void WriteMulDivType(int bits, OpArg src, int ext); void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); - void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); - void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); + void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext); + void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext); void WriteMXCSR(OpArg arg, int ext); void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); - void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); - void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); + void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); + void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); + void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); protected: - inline void Write8(u8 value) {*code++ = value;} - inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} - inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} - inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} + void Write8(u8 value) {*code++ = value;} + void Write16(u16 value) {*(u16*)code = (value); code += 2;} + void Write32(u32 value) {*(u32*)code = (value); code += 4;} + void Write64(u64 value) {*(u64*)code = (value); code += 8;} public: XEmitter() { code = nullptr; flags_locked = false; } @@ -413,8 +411,8 @@ public: // Stack control void PUSH(X64Reg reg); void POP(X64Reg reg); - void PUSH(int bits, const OpArg ®); - void POP(int bits, const OpArg ®); + void PUSH(int bits, const OpArg& reg); + void POP(int bits, const OpArg& reg); void PUSHF(); void POPF(); @@ -424,21 +422,19 @@ public: void UD2(); FixupBranch J(bool force5bytes = false); - void JMP(const u8 * addr, bool force5Bytes = false); - void JMP(OpArg arg); - void JMPptr(const OpArg &arg); + void JMP(const u8* addr, bool force5Bytes = false); + void JMPptr(const OpArg& arg); void JMPself(); //infinite loop! #ifdef CALL #undef CALL #endif - void CALL(const void *fnptr); + void CALL(const void* fnptr); void CALLptr(OpArg arg); FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); - //void J_CC(CCFlags conditionCode, JumpTarget target); - void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false); + void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); - void SetJumpTarget(const FixupBranch &branch); + void SetJumpTarget(const FixupBranch& branch); void SETcc(CCFlags flag, OpArg dest); // Note: CMOV brings small if any benefit on current cpus. @@ -450,8 +446,8 @@ public: void SFENCE(); // Bit scan - void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit - void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit + void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit + void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit // Cache control enum PrefetchLevel @@ -462,67 +458,67 @@ public: PF_T2, //Levels 3+ (aliased to T0 on AMD) }; void PREFETCH(PrefetchLevel level, OpArg arg); - void MOVNTI(int bits, OpArg dest, X64Reg src); - void MOVNTDQ(OpArg arg, X64Reg regOp); - void MOVNTPS(OpArg arg, X64Reg regOp); - void MOVNTPD(OpArg arg, X64Reg regOp); + void MOVNTI(int bits, const OpArg& dest, X64Reg src); + void MOVNTDQ(const OpArg& arg, X64Reg regOp); + void MOVNTPS(const OpArg& arg, X64Reg regOp); + void MOVNTPD(const OpArg& arg, X64Reg regOp); // Multiplication / division - void MUL(int bits, OpArg src); //UNSIGNED - void IMUL(int bits, OpArg src); //SIGNED - void IMUL(int bits, X64Reg regOp, OpArg src); - void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); - void DIV(int bits, OpArg src); - void IDIV(int bits, OpArg src); + void MUL(int bits, const OpArg& src); //UNSIGNED + void IMUL(int bits, const OpArg& src); //SIGNED + void IMUL(int bits, X64Reg regOp, const OpArg& src); + void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); + void DIV(int bits, const OpArg& src); + void IDIV(int bits, const OpArg& src); // Shift - void ROL(int bits, OpArg dest, OpArg shift); - void ROR(int bits, OpArg dest, OpArg shift); - void RCL(int bits, OpArg dest, OpArg shift); - void RCR(int bits, OpArg dest, OpArg shift); - void SHL(int bits, OpArg dest, OpArg shift); - void SHR(int bits, OpArg dest, OpArg shift); - void SAR(int bits, OpArg dest, OpArg shift); + void ROL(int bits, const OpArg& dest, const OpArg& shift); + void ROR(int bits, const OpArg& dest, const OpArg& shift); + void RCL(int bits, const OpArg& dest, const OpArg& shift); + void RCR(int bits, const OpArg& dest, const OpArg& shift); + void SHL(int bits, const OpArg& dest, const OpArg& shift); + void SHR(int bits, const OpArg& dest, const OpArg& shift); + void SAR(int bits, const OpArg& dest, const OpArg& shift); // Bit Test - void BT(int bits, OpArg dest, OpArg index); - void BTS(int bits, OpArg dest, OpArg index); - void BTR(int bits, OpArg dest, OpArg index); - void BTC(int bits, OpArg dest, OpArg index); + void BT(int bits, const OpArg& dest, const OpArg& index); + void BTS(int bits, const OpArg& dest, const OpArg& index); + void BTR(int bits, const OpArg& dest, const OpArg& index); + void BTC(int bits, const OpArg& dest, const OpArg& index); // Double-Precision Shift - void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); - void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); + void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); + void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); // Extend EAX into EDX in various ways void CWD(int bits = 16); - inline void CDQ() {CWD(32);} - inline void CQO() {CWD(64);} + void CDQ() {CWD(32);} + void CQO() {CWD(64);} void CBW(int bits = 8); - inline void CWDE() {CBW(16);} - inline void CDQE() {CBW(32);} + void CWDE() {CBW(16);} + void CDQE() {CBW(32);} // Load effective address void LEA(int bits, X64Reg dest, OpArg src); // Integer arithmetic - void NEG (int bits, OpArg src); - void ADD (int bits, const OpArg &a1, const OpArg &a2); - void ADC (int bits, const OpArg &a1, const OpArg &a2); - void SUB (int bits, const OpArg &a1, const OpArg &a2); - void SBB (int bits, const OpArg &a1, const OpArg &a2); - void AND (int bits, const OpArg &a1, const OpArg &a2); - void CMP (int bits, const OpArg &a1, const OpArg &a2); + void NEG(int bits, const OpArg& src); + void ADD(int bits, const OpArg& a1, const OpArg& a2); + void ADC(int bits, const OpArg& a1, const OpArg& a2); + void SUB(int bits, const OpArg& a1, const OpArg& a2); + void SBB(int bits, const OpArg& a1, const OpArg& a2); + void AND(int bits, const OpArg& a1, const OpArg& a2); + void CMP(int bits, const OpArg& a1, const OpArg& a2); // Bit operations - void NOT (int bits, OpArg src); - void OR (int bits, const OpArg &a1, const OpArg &a2); - void XOR (int bits, const OpArg &a1, const OpArg &a2); - void MOV (int bits, const OpArg &a1, const OpArg &a2); - void TEST(int bits, const OpArg &a1, const OpArg &a2); + void NOT (int bits, const OpArg& src); + void OR(int bits, const OpArg& a1, const OpArg& a2); + void XOR(int bits, const OpArg& a1, const OpArg& a2); + void MOV(int bits, const OpArg& a1, const OpArg& a2); + void TEST(int bits, const OpArg& a1, const OpArg& a2); // Are these useful at all? Consider removing. - void XCHG(int bits, const OpArg &a1, const OpArg &a2); + void XCHG(int bits, const OpArg& a1, const OpArg& a2); void XCHG_AHAL(); // Byte swapping (32 and 64-bit only). @@ -536,13 +532,13 @@ public: void MOVBE(int dbits, const OpArg& dest, const OpArg& src); // Available only on AMD >= Phenom or Intel >= Haswell - void LZCNT(int bits, X64Reg dest, OpArg src); + void LZCNT(int bits, X64Reg dest, const OpArg& src); // Note: this one is actually part of BMI1 - void TZCNT(int bits, X64Reg dest, OpArg src); + void TZCNT(int bits, X64Reg dest, const OpArg& src); // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) - void STMXCSR(OpArg memloc); - void LDMXCSR(OpArg memloc); + void STMXCSR(const OpArg& memloc); + void LDMXCSR(const OpArg& memloc); // Prefixes void LOCK(); @@ -569,259 +565,242 @@ public: x87_FPUBusy = 0x8000, }; - void FLD(int bits, OpArg src); - void FST(int bits, OpArg dest); - void FSTP(int bits, OpArg dest); + void FLD(int bits, const OpArg& src); + void FST(int bits, const OpArg& dest); + void FSTP(int bits, const OpArg& dest); void FNSTSW_AX(); void FWAIT(); // SSE/SSE2: Floating point arithmetic - void ADDSS(X64Reg regOp, OpArg arg); - void ADDSD(X64Reg regOp, OpArg arg); - void SUBSS(X64Reg regOp, OpArg arg); - void SUBSD(X64Reg regOp, OpArg arg); - void MULSS(X64Reg regOp, OpArg arg); - void MULSD(X64Reg regOp, OpArg arg); - void DIVSS(X64Reg regOp, OpArg arg); - void DIVSD(X64Reg regOp, OpArg arg); - void MINSS(X64Reg regOp, OpArg arg); - void MINSD(X64Reg regOp, OpArg arg); - void MAXSS(X64Reg regOp, OpArg arg); - void MAXSD(X64Reg regOp, OpArg arg); - void SQRTSS(X64Reg regOp, OpArg arg); - void SQRTSD(X64Reg regOp, OpArg arg); - void RSQRTSS(X64Reg regOp, OpArg arg); + void ADDSS(X64Reg regOp, const OpArg& arg); + void ADDSD(X64Reg regOp, const OpArg& arg); + void SUBSS(X64Reg regOp, const OpArg& arg); + void SUBSD(X64Reg regOp, const OpArg& arg); + void MULSS(X64Reg regOp, const OpArg& arg); + void MULSD(X64Reg regOp, const OpArg& arg); + void DIVSS(X64Reg regOp, const OpArg& arg); + void DIVSD(X64Reg regOp, const OpArg& arg); + void MINSS(X64Reg regOp, const OpArg& arg); + void MINSD(X64Reg regOp, const OpArg& arg); + void MAXSS(X64Reg regOp, const OpArg& arg); + void MAXSD(X64Reg regOp, const OpArg& arg); + void SQRTSS(X64Reg regOp, const OpArg& arg); + void SQRTSD(X64Reg regOp, const OpArg& arg); + void RSQRTSS(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Floating point bitwise (yes) - void CMPSS(X64Reg regOp, OpArg arg, u8 compare); - void CMPSD(X64Reg regOp, OpArg arg, u8 compare); + void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); + void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); - inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } - inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } - inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } - inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } - inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } - inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } - inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } + void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); } + void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); } + void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); } + void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); } + void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); } + void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); } + void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); } // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) - void ADDPS(X64Reg regOp, OpArg arg); - void ADDPD(X64Reg regOp, OpArg arg); - void SUBPS(X64Reg regOp, OpArg arg); - void SUBPD(X64Reg regOp, OpArg arg); - void CMPPS(X64Reg regOp, OpArg arg, u8 compare); - void CMPPD(X64Reg regOp, OpArg arg, u8 compare); - void MULPS(X64Reg regOp, OpArg arg); - void MULPD(X64Reg regOp, OpArg arg); - void DIVPS(X64Reg regOp, OpArg arg); - void DIVPD(X64Reg regOp, OpArg arg); - void MINPS(X64Reg regOp, OpArg arg); - void MINPD(X64Reg regOp, OpArg arg); - void MAXPS(X64Reg regOp, OpArg arg); - void MAXPD(X64Reg regOp, OpArg arg); - void SQRTPS(X64Reg regOp, OpArg arg); - void SQRTPD(X64Reg regOp, OpArg arg); - void RCPPS(X64Reg regOp, OpArg arg); - void RSQRTPS(X64Reg regOp, OpArg arg); + void ADDPS(X64Reg regOp, const OpArg& arg); + void ADDPD(X64Reg regOp, const OpArg& arg); + void SUBPS(X64Reg regOp, const OpArg& arg); + void SUBPD(X64Reg regOp, const OpArg& arg); + void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare); + void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare); + void MULPS(X64Reg regOp, const OpArg& arg); + void MULPD(X64Reg regOp, const OpArg& arg); + void DIVPS(X64Reg regOp, const OpArg& arg); + void DIVPD(X64Reg regOp, const OpArg& arg); + void MINPS(X64Reg regOp, const OpArg& arg); + void MINPD(X64Reg regOp, const OpArg& arg); + void MAXPS(X64Reg regOp, const OpArg& arg); + void MAXPD(X64Reg regOp, const OpArg& arg); + void SQRTPS(X64Reg regOp, const OpArg& arg); + void SQRTPD(X64Reg regOp, const OpArg& arg); + void RCPPS(X64Reg regOp, const OpArg& arg); + void RSQRTPS(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) - void ANDPS(X64Reg regOp, OpArg arg); - void ANDPD(X64Reg regOp, OpArg arg); - void ANDNPS(X64Reg regOp, OpArg arg); - void ANDNPD(X64Reg regOp, OpArg arg); - void ORPS(X64Reg regOp, OpArg arg); - void ORPD(X64Reg regOp, OpArg arg); - void XORPS(X64Reg regOp, OpArg arg); - void XORPD(X64Reg regOp, OpArg arg); + void ANDPS(X64Reg regOp, const OpArg& arg); + void ANDPD(X64Reg regOp, const OpArg& arg); + void ANDNPS(X64Reg regOp, const OpArg& arg); + void ANDNPD(X64Reg regOp, const OpArg& arg); + void ORPS(X64Reg regOp, const OpArg& arg); + void ORPD(X64Reg regOp, const OpArg& arg); + void XORPS(X64Reg regOp, const OpArg& arg); + void XORPD(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. - void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); - void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); + void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); + void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); // SSE/SSE2: Useful alternative to shuffle in some cases. - void MOVDDUP(X64Reg regOp, OpArg arg); - - // TODO: Actually implement -#if 0 - // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... - void ADDSUBPS(X64Reg dest, OpArg src); - void ADDSUBPD(X64Reg dest, OpArg src); - void HADDPD(X64Reg dest, OpArg src); - void HSUBPS(X64Reg dest, OpArg src); - void HSUBPD(X64Reg dest, OpArg src); - - // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". - void DPPD(X64Reg dest, OpArg src, u8 arg); - - // These are probably useful for VFPU emulation. - void INSERTPS(X64Reg dest, OpArg src, u8 arg); - void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); -#endif + void MOVDDUP(X64Reg regOp, const OpArg& arg); // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. - void HADDPS(X64Reg dest, OpArg src); + void HADDPS(X64Reg dest, const OpArg& src); // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". - void DPPS(X64Reg dest, OpArg src, u8 arg); + void DPPS(X64Reg dest, const OpArg& src, u8 arg); - void UNPCKLPS(X64Reg dest, OpArg src); - void UNPCKHPS(X64Reg dest, OpArg src); - void UNPCKLPD(X64Reg dest, OpArg src); - void UNPCKHPD(X64Reg dest, OpArg src); + void UNPCKLPS(X64Reg dest, const OpArg& src); + void UNPCKHPS(X64Reg dest, const OpArg& src); + void UNPCKLPD(X64Reg dest, const OpArg& src); + void UNPCKHPD(X64Reg dest, const OpArg& src); // SSE/SSE2: Compares. - void COMISS(X64Reg regOp, OpArg arg); - void COMISD(X64Reg regOp, OpArg arg); - void UCOMISS(X64Reg regOp, OpArg arg); - void UCOMISD(X64Reg regOp, OpArg arg); + void COMISS(X64Reg regOp, const OpArg& arg); + void COMISD(X64Reg regOp, const OpArg& arg); + void UCOMISS(X64Reg regOp, const OpArg& arg); + void UCOMISD(X64Reg regOp, const OpArg& arg); // SSE/SSE2: Moves. Use the right data type for your data, in most cases. - void MOVAPS(X64Reg regOp, OpArg arg); - void MOVAPD(X64Reg regOp, OpArg arg); - void MOVAPS(OpArg arg, X64Reg regOp); - void MOVAPD(OpArg arg, X64Reg regOp); - - void MOVUPS(X64Reg regOp, OpArg arg); - void MOVUPD(X64Reg regOp, OpArg arg); - void MOVUPS(OpArg arg, X64Reg regOp); - void MOVUPD(OpArg arg, X64Reg regOp); - - void MOVDQA(X64Reg regOp, OpArg arg); - void MOVDQA(OpArg arg, X64Reg regOp); - void MOVDQU(X64Reg regOp, OpArg arg); - void MOVDQU(OpArg arg, X64Reg regOp); - - void MOVSS(X64Reg regOp, OpArg arg); - void MOVSD(X64Reg regOp, OpArg arg); - void MOVSS(OpArg arg, X64Reg regOp); - void MOVSD(OpArg arg, X64Reg regOp); - - void MOVLPS(X64Reg regOp, OpArg arg); - void MOVLPD(X64Reg regOp, OpArg arg); - void MOVLPS(OpArg arg, X64Reg regOp); - void MOVLPD(OpArg arg, X64Reg regOp); - - void MOVHPS(X64Reg regOp, OpArg arg); - void MOVHPD(X64Reg regOp, OpArg arg); - void MOVHPS(OpArg arg, X64Reg regOp); - void MOVHPD(OpArg arg, X64Reg regOp); + void MOVAPS(X64Reg regOp, const OpArg& arg); + void MOVAPD(X64Reg regOp, const OpArg& arg); + void MOVAPS(const OpArg& arg, X64Reg regOp); + void MOVAPD(const OpArg& arg, X64Reg regOp); + + void MOVUPS(X64Reg regOp, const OpArg& arg); + void MOVUPD(X64Reg regOp, const OpArg& arg); + void MOVUPS(const OpArg& arg, X64Reg regOp); + void MOVUPD(const OpArg& arg, X64Reg regOp); + + void MOVDQA(X64Reg regOp, const OpArg& arg); + void MOVDQA(const OpArg& arg, X64Reg regOp); + void MOVDQU(X64Reg regOp, const OpArg& arg); + void MOVDQU(const OpArg& arg, X64Reg regOp); + + void MOVSS(X64Reg regOp, const OpArg& arg); + void MOVSD(X64Reg regOp, const OpArg& arg); + void MOVSS(const OpArg& arg, X64Reg regOp); + void MOVSD(const OpArg& arg, X64Reg regOp); + + void MOVLPS(X64Reg regOp, const OpArg& arg); + void MOVLPD(X64Reg regOp, const OpArg& arg); + void MOVLPS(const OpArg& arg, X64Reg regOp); + void MOVLPD(const OpArg& arg, X64Reg regOp); + + void MOVHPS(X64Reg regOp, const OpArg& arg); + void MOVHPD(X64Reg regOp, const OpArg& arg); + void MOVHPS(const OpArg& arg, X64Reg regOp); + void MOVHPD(const OpArg& arg, X64Reg regOp); void MOVHLPS(X64Reg regOp1, X64Reg regOp2); void MOVLHPS(X64Reg regOp1, X64Reg regOp2); - void MOVD_xmm(X64Reg dest, const OpArg &arg); + void MOVD_xmm(X64Reg dest, const OpArg& arg); void MOVQ_xmm(X64Reg dest, OpArg arg); - void MOVD_xmm(const OpArg &arg, X64Reg src); + void MOVD_xmm(const OpArg& arg, X64Reg src); void MOVQ_xmm(OpArg arg, X64Reg src); // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. - void MOVMSKPS(X64Reg dest, OpArg arg); - void MOVMSKPD(X64Reg dest, OpArg arg); + void MOVMSKPS(X64Reg dest, const OpArg& arg); + void MOVMSKPD(X64Reg dest, const OpArg& arg); // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. void MASKMOVDQU(X64Reg dest, X64Reg src); - void LDDQU(X64Reg dest, OpArg src); + void LDDQU(X64Reg dest, const OpArg& src); // SSE/SSE2: Data type conversions. - void CVTPS2PD(X64Reg dest, OpArg src); - void CVTPD2PS(X64Reg dest, OpArg src); - void CVTSS2SD(X64Reg dest, OpArg src); - void CVTSI2SS(X64Reg dest, OpArg src); - void CVTSD2SS(X64Reg dest, OpArg src); - void CVTSI2SD(X64Reg dest, OpArg src); - void CVTDQ2PD(X64Reg regOp, OpArg arg); - void CVTPD2DQ(X64Reg regOp, OpArg arg); - void CVTDQ2PS(X64Reg regOp, OpArg arg); - void CVTPS2DQ(X64Reg regOp, OpArg arg); - - void CVTTPS2DQ(X64Reg regOp, OpArg arg); - void CVTTPD2DQ(X64Reg regOp, OpArg arg); + void CVTPS2PD(X64Reg dest, const OpArg& src); + void CVTPD2PS(X64Reg dest, const OpArg& src); + void CVTSS2SD(X64Reg dest, const OpArg& src); + void CVTSI2SS(X64Reg dest, const OpArg& src); + void CVTSD2SS(X64Reg dest, const OpArg& src); + void CVTSI2SD(X64Reg dest, const OpArg& src); + void CVTDQ2PD(X64Reg regOp, const OpArg& arg); + void CVTPD2DQ(X64Reg regOp, const OpArg& arg); + void CVTDQ2PS(X64Reg regOp, const OpArg& arg); + void CVTPS2DQ(X64Reg regOp, const OpArg& arg); + + void CVTTPS2DQ(X64Reg regOp, const OpArg& arg); + void CVTTPD2DQ(X64Reg regOp, const OpArg& arg); // Destinations are X64 regs (rax, rbx, ...) for these instructions. - void CVTSS2SI(X64Reg xregdest, OpArg src); - void CVTSD2SI(X64Reg xregdest, OpArg src); - void CVTTSS2SI(X64Reg xregdest, OpArg arg); - void CVTTSD2SI(X64Reg xregdest, OpArg arg); + void CVTSS2SI(X64Reg xregdest, const OpArg& src); + void CVTSD2SI(X64Reg xregdest, const OpArg& src); + void CVTTSS2SI(X64Reg xregdest, const OpArg& arg); + void CVTTSD2SI(X64Reg xregdest, const OpArg& arg); // SSE2: Packed integer instructions - void PACKSSDW(X64Reg dest, OpArg arg); - void PACKSSWB(X64Reg dest, OpArg arg); - void PACKUSDW(X64Reg dest, OpArg arg); - void PACKUSWB(X64Reg dest, OpArg arg); + void PACKSSDW(X64Reg dest, const OpArg& arg); + void PACKSSWB(X64Reg dest, const OpArg& arg); + void PACKUSDW(X64Reg dest, const OpArg& arg); + void PACKUSWB(X64Reg dest, const OpArg& arg); void PUNPCKLBW(X64Reg dest, const OpArg &arg); void PUNPCKLWD(X64Reg dest, const OpArg &arg); void PUNPCKLDQ(X64Reg dest, const OpArg &arg); void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); - void PTEST(X64Reg dest, OpArg arg); - void PAND(X64Reg dest, OpArg arg); - void PANDN(X64Reg dest, OpArg arg); - void PXOR(X64Reg dest, OpArg arg); - void POR(X64Reg dest, OpArg arg); - - void PADDB(X64Reg dest, OpArg arg); - void PADDW(X64Reg dest, OpArg arg); - void PADDD(X64Reg dest, OpArg arg); - void PADDQ(X64Reg dest, OpArg arg); - - void PADDSB(X64Reg dest, OpArg arg); - void PADDSW(X64Reg dest, OpArg arg); - void PADDUSB(X64Reg dest, OpArg arg); - void PADDUSW(X64Reg dest, OpArg arg); - - void PSUBB(X64Reg dest, OpArg arg); - void PSUBW(X64Reg dest, OpArg arg); - void PSUBD(X64Reg dest, OpArg arg); - void PSUBQ(X64Reg dest, OpArg arg); - - void PSUBSB(X64Reg dest, OpArg arg); - void PSUBSW(X64Reg dest, OpArg arg); - void PSUBUSB(X64Reg dest, OpArg arg); - void PSUBUSW(X64Reg dest, OpArg arg); - - void PAVGB(X64Reg dest, OpArg arg); - void PAVGW(X64Reg dest, OpArg arg); - - void PCMPEQB(X64Reg dest, OpArg arg); - void PCMPEQW(X64Reg dest, OpArg arg); - void PCMPEQD(X64Reg dest, OpArg arg); - - void PCMPGTB(X64Reg dest, OpArg arg); - void PCMPGTW(X64Reg dest, OpArg arg); - void PCMPGTD(X64Reg dest, OpArg arg); - - void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); - void PINSRW(X64Reg dest, OpArg arg, u8 subreg); - - void PMADDWD(X64Reg dest, OpArg arg); - void PSADBW(X64Reg dest, OpArg arg); - - void PMAXSW(X64Reg dest, OpArg arg); - void PMAXUB(X64Reg dest, OpArg arg); - void PMINSW(X64Reg dest, OpArg arg); - void PMINUB(X64Reg dest, OpArg arg); + void PTEST(X64Reg dest, const OpArg& arg); + void PAND(X64Reg dest, const OpArg& arg); + void PANDN(X64Reg dest, const OpArg& arg); + void PXOR(X64Reg dest, const OpArg& arg); + void POR(X64Reg dest, const OpArg& arg); + + void PADDB(X64Reg dest, const OpArg& arg); + void PADDW(X64Reg dest, const OpArg& arg); + void PADDD(X64Reg dest, const OpArg& arg); + void PADDQ(X64Reg dest, const OpArg& arg); + + void PADDSB(X64Reg dest, const OpArg& arg); + void PADDSW(X64Reg dest, const OpArg& arg); + void PADDUSB(X64Reg dest, const OpArg& arg); + void PADDUSW(X64Reg dest, const OpArg& arg); + + void PSUBB(X64Reg dest, const OpArg& arg); + void PSUBW(X64Reg dest, const OpArg& arg); + void PSUBD(X64Reg dest, const OpArg& arg); + void PSUBQ(X64Reg dest, const OpArg& arg); + + void PSUBSB(X64Reg dest, const OpArg& arg); + void PSUBSW(X64Reg dest, const OpArg& arg); + void PSUBUSB(X64Reg dest, const OpArg& arg); + void PSUBUSW(X64Reg dest, const OpArg& arg); + + void PAVGB(X64Reg dest, const OpArg& arg); + void PAVGW(X64Reg dest, const OpArg& arg); + + void PCMPEQB(X64Reg dest, const OpArg& arg); + void PCMPEQW(X64Reg dest, const OpArg& arg); + void PCMPEQD(X64Reg dest, const OpArg& arg); + + void PCMPGTB(X64Reg dest, const OpArg& arg); + void PCMPGTW(X64Reg dest, const OpArg& arg); + void PCMPGTD(X64Reg dest, const OpArg& arg); + + void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg); + void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg); + + void PMADDWD(X64Reg dest, const OpArg& arg); + void PSADBW(X64Reg dest, const OpArg& arg); + + void PMAXSW(X64Reg dest, const OpArg& arg); + void PMAXUB(X64Reg dest, const OpArg& arg); + void PMINSW(X64Reg dest, const OpArg& arg); + void PMINUB(X64Reg dest, const OpArg& arg); // SSE4: More MAX/MIN instructions. - void PMINSB(X64Reg dest, OpArg arg); - void PMINSD(X64Reg dest, OpArg arg); - void PMINUW(X64Reg dest, OpArg arg); - void PMINUD(X64Reg dest, OpArg arg); - void PMAXSB(X64Reg dest, OpArg arg); - void PMAXSD(X64Reg dest, OpArg arg); - void PMAXUW(X64Reg dest, OpArg arg); - void PMAXUD(X64Reg dest, OpArg arg); - - void PMOVMSKB(X64Reg dest, OpArg arg); - void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); - void PSHUFB(X64Reg dest, OpArg arg); - - void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); - void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); + void PMINSB(X64Reg dest, const OpArg& arg); + void PMINSD(X64Reg dest, const OpArg& arg); + void PMINUW(X64Reg dest, const OpArg& arg); + void PMINUD(X64Reg dest, const OpArg& arg); + void PMAXSB(X64Reg dest, const OpArg& arg); + void PMAXSD(X64Reg dest, const OpArg& arg); + void PMAXUW(X64Reg dest, const OpArg& arg); + void PMAXUD(X64Reg dest, const OpArg& arg); + + void PMOVMSKB(X64Reg dest, const OpArg& arg); + void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle); + void PSHUFB(X64Reg dest, const OpArg& arg); + + void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle); + void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle); void PSRLW(X64Reg reg, int shift); void PSRLD(X64Reg reg, int shift); void PSRLQ(X64Reg reg, int shift); - void PSRLQ(X64Reg reg, OpArg arg); + void PSRLQ(X64Reg reg, const OpArg& arg); void PSRLDQ(X64Reg reg, int shift); void PSLLW(X64Reg reg, int shift); @@ -833,198 +812,198 @@ public: void PSRAD(X64Reg reg, int shift); // SSE4: data type conversions - void PMOVSXBW(X64Reg dest, OpArg arg); - void PMOVSXBD(X64Reg dest, OpArg arg); - void PMOVSXBQ(X64Reg dest, OpArg arg); - void PMOVSXWD(X64Reg dest, OpArg arg); - void PMOVSXWQ(X64Reg dest, OpArg arg); - void PMOVSXDQ(X64Reg dest, OpArg arg); - void PMOVZXBW(X64Reg dest, OpArg arg); - void PMOVZXBD(X64Reg dest, OpArg arg); - void PMOVZXBQ(X64Reg dest, OpArg arg); - void PMOVZXWD(X64Reg dest, OpArg arg); - void PMOVZXWQ(X64Reg dest, OpArg arg); - void PMOVZXDQ(X64Reg dest, OpArg arg); + void PMOVSXBW(X64Reg dest, const OpArg& arg); + void PMOVSXBD(X64Reg dest, const OpArg& arg); + void PMOVSXBQ(X64Reg dest, const OpArg& arg); + void PMOVSXWD(X64Reg dest, const OpArg& arg); + void PMOVSXWQ(X64Reg dest, const OpArg& arg); + void PMOVSXDQ(X64Reg dest, const OpArg& arg); + void PMOVZXBW(X64Reg dest, const OpArg& arg); + void PMOVZXBD(X64Reg dest, const OpArg& arg); + void PMOVZXBQ(X64Reg dest, const OpArg& arg); + void PMOVZXWD(X64Reg dest, const OpArg& arg); + void PMOVZXWQ(X64Reg dest, const OpArg& arg); + void PMOVZXDQ(X64Reg dest, const OpArg& arg); // SSE4: variable blend instructions (xmm0 implicit argument) - void PBLENDVB(X64Reg dest, OpArg arg); - void BLENDVPS(X64Reg dest, OpArg arg); - void BLENDVPD(X64Reg dest, OpArg arg); + void PBLENDVB(X64Reg dest, const OpArg& arg); + void BLENDVPS(X64Reg dest, const OpArg& arg); + void BLENDVPD(X64Reg dest, const OpArg& arg); void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) - void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); - void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); - void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); - void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); - - inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } - - inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } - inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } - inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } - inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } + void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode); + void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode); + void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); + void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); + + void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } + void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); } + void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); } + + void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } + void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); } + void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); } + + void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } + void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); } + void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); } + + void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } + void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } + void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); } + void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); } // AVX - void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); - void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - - void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle); + void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + + void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + + void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); // FMA3 - void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); - void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); + void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); // VEX GPR instructions - void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); - void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); - void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void BLSR(int bits, X64Reg regOp, OpArg arg); - void BLSMSK(int bits, X64Reg regOp, OpArg arg); - void BLSI(int bits, X64Reg regOp, OpArg arg); - void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); - void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate); + void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void BLSR(int bits, X64Reg regOp, const OpArg& arg); + void BLSMSK(int bits, X64Reg regOp, const OpArg& arg); + void BLSI(int bits, X64Reg regOp, const OpArg& arg); + void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); + void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); void RDTSC(); // Utility functions // The difference between this and CALL is that this aligns the stack // where appropriate. - void ABI_CallFunction(const void *func); + void ABI_CallFunction(const void* func); template <typename T> void ABI_CallFunction(T (*func)()) { - ABI_CallFunction((const void *)func); + ABI_CallFunction((const void*)func); } - void ABI_CallFunction(const u8 *func) { - ABI_CallFunction((const void *)func); + void ABI_CallFunction(const u8* func) { + ABI_CallFunction((const void*)func); } - void ABI_CallFunctionC16(const void *func, u16 param1); - void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); + void ABI_CallFunctionC16(const void* func, u16 param1); + void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); // These only support u32 parameters, but that's enough for a lot of uses. // These will destroy the 1 or 2 first "parameter regs". - void ABI_CallFunctionC(const void *func, u32 param1); - void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); - void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); - void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); - void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); - void ABI_CallFunctionP(const void *func, void *param1); - void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); - void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); - void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); - void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); - void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); - void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); - void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); + void ABI_CallFunctionC(const void* func, u32 param1); + void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2); + void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3); + void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3); + void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4); + void ABI_CallFunctionP(const void* func, void* param1); + void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2); + void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3); + void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3); + void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2); + void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3); + void ABI_CallFunctionA(const void* func, const OpArg& arg1); + void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2); // Pass a register as a parameter. - void ABI_CallFunctionR(const void *func, X64Reg reg1); - void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); + void ABI_CallFunctionR(const void* func, X64Reg reg1); + void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2); template <typename Tr, typename T1> void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { - ABI_CallFunctionC((const void *)func, param1); + ABI_CallFunctionC((const void*)func, param1); } // A function that doesn't have any control over what it will do to regs, @@ -1048,9 +1027,9 @@ public: void ABI_EmitEpilogue(int maxCallParams); #ifdef _M_IX86 - inline int ABI_GetNumXMMRegs() { return 8; } + static int ABI_GetNumXMMRegs() { return 8; } #else - inline int ABI_GetNumXMMRegs() { return 16; } + static int ABI_GetNumXMMRegs() { return 16; } #endif }; // class XEmitter |