diff options
author | Evan Cheng <evan.cheng@apple.com> | 2006-02-22 02:26:30 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2006-02-22 02:26:30 +0000 |
commit | 470a6adc784ad2e4194272b24f5ec50ee32aeb49 (patch) | |
tree | 4ba2cd0379735b13a27bcdf89120e009a3092887 /lib/Target/X86 | |
parent | e7988aae9457aecde4ae1fe3de211d00717c1b03 (diff) | |
download | external_llvm-470a6adc784ad2e4194272b24f5ec50ee32aeb49.zip external_llvm-470a6adc784ad2e4194272b24f5ec50ee32aeb49.tar.gz external_llvm-470a6adc784ad2e4194272b24f5ec50ee32aeb49.tar.bz2 |
Added MMX, SSE1, and SSE2 vector instructions and some simple patterns.
Fixed some existing bugs (wrong predicates, prefixes) at the same time.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26310 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rwxr-xr-x | lib/Target/X86/X86ATTAsmPrinter.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 38 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 777 | ||||
-rwxr-xr-x | lib/Target/X86/X86IntelAsmPrinter.h | 4 |
5 files changed, 544 insertions, 285 deletions
diff --git a/lib/Target/X86/X86ATTAsmPrinter.h b/lib/Target/X86/X86ATTAsmPrinter.h index 14d1e63..2fc1aa5 100755 --- a/lib/Target/X86/X86ATTAsmPrinter.h +++ b/lib/Target/X86/X86ATTAsmPrinter.h @@ -49,6 +49,9 @@ struct X86ATTAsmPrinter : public X86SharedAsmPrinter { void printi64mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } + void printi128mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } void printf32mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e32cc0f..77fc52b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -236,6 +236,44 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addLegalFPImmediate(-0.0); // FLD0/FCHS addLegalFPImmediate(-1.0); // FLD1/FCHS } + + if (TM.getSubtarget<X86Subtarget>().hasMMX()) { + addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); + addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); + addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); + + // FIXME: We don't support any ConstantVec's yet. We should custom expand + // the ones we do! + setOperationAction(ISD::ConstantVec, MVT::v8i8, Expand); + setOperationAction(ISD::ConstantVec, MVT::v4i16, Expand); + setOperationAction(ISD::ConstantVec, MVT::v2i32, Expand); + } + + if (TM.getSubtarget<X86Subtarget>().hasSSE1()) { + addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); + + // FIXME: We don't support any ConstantVec's yet. We should custom expand + // the ones we do! + setOperationAction(ISD::ConstantVec, MVT::v4f32, Expand); + } + + if (TM.getSubtarget<X86Subtarget>().hasSSE2()) { + addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); + addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); + addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); + addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); + addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); + + + // FIXME: We don't support any ConstantVec's yet. We should custom expand + // the ones we do! + setOperationAction(ISD::ConstantVec, MVT::v2f64, Expand); + setOperationAction(ISD::ConstantVec, MVT::v16i8, Expand); + setOperationAction(ISD::ConstantVec, MVT::v8i16, Expand); + setOperationAction(ISD::ConstantVec, MVT::v4i32, Expand); + setOperationAction(ISD::ConstantVec, MVT::v2i64, Expand); + } + computeRegisterProperties(); // FIXME: These should be based on subtarget info. Plus, the values should diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c8f5552..89f53f3 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -136,6 +136,7 @@ def i8mem : X86MemOperand<"printi8mem">; def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; def i64mem : X86MemOperand<"printi64mem">; +def i128mem : X86MemOperand<"printi128mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f128mem : X86MemOperand<"printf128mem">; @@ -341,6 +342,9 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>; def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; +def X86loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; +def X86loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; + def sextloadi16i1 : PatFrag<(ops node:$ptr), (i16 (sextload node:$ptr, i1))>; def sextloadi32i1 : PatFrag<(ops node:$ptr), (i32 (sextload node:$ptr, i1))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextload node:$ptr, i8))>; @@ -360,9 +364,6 @@ def extloadf64f32 : PatFrag<(ops node:$ptr), (f64 (extload node:$ptr, f32))>; def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>; def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>; -def X86loadpv4f32 : PatFrag<(ops node:$ptr), (v4f32 (X86loadp node:$ptr))>; -def X86loadpv2f64 : PatFrag<(ops node:$ptr), (v2f64 (X86loadp node:$ptr))>; - //===----------------------------------------------------------------------===// // Instruction templates... diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 94e78b1..78fd8b9 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -17,6 +17,20 @@ // SSE scalar FP Instructions //===----------------------------------------------------------------------===// +// Instruction templates +// SSI - SSE1 instructions with XS prefix. +// SDI - SSE2 instructions with XD prefix. +// PSI - SSE1 instructions with TB prefix. +// PDI - SSE2 instructions with TB and OpSize prefixes. +class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>; +class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>; +class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>; +class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern> + : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>; + // Some 'special' instructions def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst), "#IMPLICIT_DEF $dst", @@ -39,206 +53,189 @@ let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. } // Move Instructions -def MOVSSrr : I<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src), - "movss {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, XS; -def MOVSDrr : I<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src), - "movsd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE2]>, XD; - -def MOVSSrm : I<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src), - "movss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (loadf32 addr:$src))]>, - Requires<[HasSSE1]>, XS; -def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src), +def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src), + "movss {$src, $dst|$dst, $src}", []>; +def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src), "movss {$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)]>, - Requires<[HasSSE1]>, XS; -def MOVSDrm : I<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src), + [(set FR32:$dst, (loadf32 addr:$src))]>; +def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src), + "movsd {$src, $dst|$dst, $src}", []>; +def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src), "movsd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (loadf64 addr:$src))]>, - Requires<[HasSSE2]>, XD; -def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src), + [(set FR64:$dst, (loadf64 addr:$src))]>; + +def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src), + "movss {$src, $dst|$dst, $src}", + [(store FR32:$src, addr:$dst)]>; +def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src), "movsd {$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>, - Requires<[HasSSE2]>, XD; + [(store FR64:$src, addr:$dst)]>; // Conversion instructions -def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src), +def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src), "cvttss2si {$src, $dst|$dst, $src}", - [(set R32:$dst, (fp_to_sint FR32:$src))]>, - Requires<[HasSSE1]>, XS; -def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src), + [(set R32:$dst, (fp_to_sint FR32:$src))]>; +def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src), "cvttss2si {$src, $dst|$dst, $src}", - [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>, - Requires<[HasSSE1]>, XS; -def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src), + [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>; +def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src), "cvttsd2si {$src, $dst|$dst, $src}", - [(set R32:$dst, (fp_to_sint FR64:$src))]>, - Requires<[HasSSE2]>, XD; -def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src), + [(set R32:$dst, (fp_to_sint FR64:$src))]>; +def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src), "cvttsd2si {$src, $dst|$dst, $src}", - [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>, - Requires<[HasSSE2]>, XD; -def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src), - "cvtss2sd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (fextend FR32:$src))]>, - Requires<[HasSSE2]>, XS; -def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src), - "cvtss2sd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, - Requires<[HasSSE2]>, XS; -def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src), + [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>; +def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src), "cvtsd2ss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (fround FR64:$src))]>, - Requires<[HasSSE2]>, XD; -def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src), + [(set FR32:$dst, (fround FR64:$src))]>; +def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src), "cvtsd2ss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, - Requires<[HasSSE2]>, XD; -def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src), + [(set FR32:$dst, (fround (loadf64 addr:$src)))]>; +def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src), "cvtsi2ss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp R32:$src))]>, - Requires<[HasSSE2]>, XS; -def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src), + [(set FR32:$dst, (sint_to_fp R32:$src))]>; +def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src), "cvtsi2ss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>, - Requires<[HasSSE2]>, XS; -def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src), + [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>; +def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src), "cvtsi2sd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp R32:$src))]>, - Requires<[HasSSE2]>, XD; -def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src), + [(set FR64:$dst, (sint_to_fp R32:$src))]>; +def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src), "cvtsi2sd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>, - Requires<[HasSSE2]>, XD; + [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>; +// SSE2 instructions with XS prefix +def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src), + "cvtss2sd {$src, $dst|$dst, $src}", + [(set FR64:$dst, (fextend FR32:$src))]>, XS, + Requires<[HasSSE2]>; +def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src), + "cvtss2sd {$src, $dst|$dst, $src}", + [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS, + Requires<[HasSSE2]>; // Arithmetic instructions let isTwoAddress = 1 in { let isCommutable = 1 in { -def ADDSSrr : I<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), +def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "addss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, XS; -def ADDSDrr : I<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>; +def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), "addsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, XD; -def MULSSrr : I<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>; +def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "mulss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, XS; -def MULSDrr : I<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>; +def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), "mulsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, XD; + [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>; } -def ADDSSrm : I<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), +def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), "addss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>, - Requires<[HasSSE1]>, XS; -def ADDSDrm : I<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), + [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>; +def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), "addsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>, - Requires<[HasSSE2]>, XD; -def MULSSrm : I<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), + [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>; +def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), "mulss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>, - Requires<[HasSSE1]>, XS; -def MULSDrm : I<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), + [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>; +def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), "mulsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>, - Requires<[HasSSE2]>, XD; + [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>; -def DIVSSrr : I<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), +def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "divss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, XS; -def DIVSSrm : I<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), + [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>; +def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), "divss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>, - Requires<[HasSSE1]>, XS; -def DIVSDrr : I<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>; +def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), "divsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, XD; -def DIVSDrm : I<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), + [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>; +def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), "divsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>, - Requires<[HasSSE2]>, XD; + [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>; -def SUBSSrr : I<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), +def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "subss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, XS; -def SUBSSrm : I<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), + [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>; +def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2), "subss {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>, - Requires<[HasSSE1]>, XS; -def SUBSDrr : I<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>; +def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), "subsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, XD; -def SUBSDrm : I<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), + [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>; +def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2), "subsd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>, - Requires<[HasSSE2]>, XD; + [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>; } -def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src), +def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src), "sqrtss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (fsqrt FR32:$src))]>, - Requires<[HasSSE1]>, XS; -def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src), + [(set FR32:$dst, (fsqrt FR32:$src))]>; +def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src), "sqrtss {$src, $dst|$dst, $src}", - [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>, - Requires<[HasSSE1]>, XS; -def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src), + [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>; +def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src), "sqrtsd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (fsqrt FR64:$src))]>, - Requires<[HasSSE2]>, XD; -def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src), + [(set FR64:$dst, (fsqrt FR64:$src))]>; +def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src), "sqrtsd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>, - Requires<[HasSSE2]>, XD; + [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>; + +def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src), + "rsqrtss {$src, $dst|$dst, $src}", []>; +def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src), + "rsqrtss {$src, $dst|$dst, $src}", []>; +def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src), + "rcpss {$src, $dst|$dst, $src}", []>; +def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src), + "rcpss {$src, $dst|$dst, $src}", []>; + +def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src), + "maxss {$src, $dst|$dst, $src}", []>; +def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src), + "maxss {$src, $dst|$dst, $src}", []>; +def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src), + "maxsd {$src, $dst|$dst, $src}", []>; +def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src), + "maxsd {$src, $dst|$dst, $src}", []>; +def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src), + "minss {$src, $dst|$dst, $src}", []>; +def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src), + "minss {$src, $dst|$dst, $src}", []>; +def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src), + "minsd {$src, $dst|$dst, $src}", []>; +def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src), + "minsd {$src, $dst|$dst, $src}", []>; // Comparison instructions let isTwoAddress = 1 in { -def CMPSSrr : I<0xC2, MRMSrcReg, +def CMPSSrr : SSI<0xC2, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc), - "cmp${cc}ss {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, XS; -def CMPSSrm : I<0xC2, MRMSrcMem, + "cmp${cc}ss {$src, $dst|$dst, $src}", []>; +def CMPSSrm : SSI<0xC2, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, XS; -def CMPSDrr : I<0xC2, MRMSrcReg, + "cmp${cc}ss {$src, $dst|$dst, $src}", []>; +def CMPSDrr : SDI<0xC2, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc), - "cmp${cc}sd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, XD; -def CMPSDrm : I<0xC2, MRMSrcMem, + "cmp${cc}sd {$src, $dst|$dst, $src}", []>; +def CMPSDrm : SDI<0xC2, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE2]>, XD; + "cmp${cc}sd {$src, $dst|$dst, $src}", []>; } -def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2), +def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2), "ucomiss {$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, FR32:$src2)]>, - Requires<[HasSSE1]>, TB; -def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2), + [(X86cmp FR32:$src1, FR32:$src2)]>; +def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2), "ucomiss {$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>, - Requires<[HasSSE1]>, TB; -def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2), + [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>; +def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2), "ucomisd {$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, FR64:$src2)]>, - Requires<[HasSSE2]>, TB, OpSize; -def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2), + [(X86cmp FR64:$src1, FR64:$src2)]>; +def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2), "ucomisd {$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; + [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>; // Aliases of packed instructions for scalar use. These all have names that // start with 'Fs'. @@ -254,89 +251,69 @@ def FsFLD0SD : I<0xEF, MRMInitReg, (ops FR64:$dst), // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd. // Upper bits are disregarded. -def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src), - "movapd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE2]>, TB, OpSize; +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src), + "movaps {$src, $dst|$dst, $src}", []>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src), + "movapd {$src, $dst|$dst, $src}", []>; // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd. // Upper bits are disregarded. -def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src), +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src), "movaps {$src, $dst|$dst, $src}", - [(set FR32:$dst, (X86loadpf32 addr:$src))]>, - Requires<[HasSSE1]>, TB; -def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src), + [(set FR32:$dst, (X86loadpf32 addr:$src))]>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src), "movapd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (X86loadpf64 addr:$src))]>, - Requires<[HasSSE2]>, TB, OpSize; + [(set FR64:$dst, (X86loadpf64 addr:$src))]>; // Alias bitwise logical operations using SSE logical ops on packed FP values. let isTwoAddress = 1 in { let isCommutable = 1 in { -def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), +def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "andps {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, TB; -def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>; +def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), "andpd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>; +def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + "orps {$src2, $dst|$dst, $src2}", []>; +def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>; +def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), "xorps {$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>, - Requires<[HasSSE1]>, TB; -def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>; +def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), "xorpd {$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; + [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>; } -def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), +def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), "andps {$src2, $dst|$dst, $src2}", [(set FR32:$dst, (X86fand FR32:$src1, - (X86loadpf32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + (X86loadpf32 addr:$src2)))]>; +def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), "andpd {$src2, $dst|$dst, $src2}", [(set FR64:$dst, (X86fand FR64:$src1, - (X86loadpf64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; -def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + (X86loadpf64 addr:$src2)))]>; +def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + "orps {$src2, $dst|$dst, $src2}", []>; +def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>; +def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), "xorps {$src2, $dst|$dst, $src2}", [(set FR32:$dst, (X86fxor FR32:$src1, - (X86loadpf32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + (X86loadpf32 addr:$src2)))]>; +def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), "xorpd {$src2, $dst|$dst, $src2}", [(set FR64:$dst, (X86fxor FR64:$src1, - (X86loadpf64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; - -def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; + (X86loadpf64 addr:$src2)))]>; + +def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>; +def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>; +def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>; +def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>; } //===----------------------------------------------------------------------===// @@ -344,114 +321,350 @@ def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2), //===----------------------------------------------------------------------===// // Move Instructions -def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), - "movapd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE2]>, TB, OpSize; +def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "movaps {$src, $dst|$dst, $src}", []>; +def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "movaps {$src, $dst|$dst, $src}", + [(set V4F32:$dst, (X86loadv4f32 addr:$src))]>; +def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "movapd {$src, $dst|$dst, $src}", []>; +def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), + "movapd {$src, $dst|$dst, $src}", + [(set V2F64:$dst, (X86loadv2f64 addr:$src))]>; -def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), - "movaps {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB; -def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src), - "movaps {$src, $dst|$dst, $src}",[]>, - Requires<[HasSSE1]>, TB; -def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), - "movapd {$src, $dst|$dst, $src}", []>, - Requires<[HasSSE1]>, TB, OpSize; -def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src), - "movapd {$src, $dst|$dst, $src}",[]>, - Requires<[HasSSE2]>, TB, OpSize; +def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src), + "movaps {$src, $dst|$dst, $src}", + [(store V4F32:$src, addr:$dst)]>; +def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src), + "movapd {$src, $dst|$dst, $src}", + [(store V2F64:$src, addr:$dst)]>; + +def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "movups {$src, $dst|$dst, $src}", []>; +def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "movups {$src, $dst|$dst, $src}", []>; +def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, V4F32:$src), + "movups {$src, $dst|$dst, $src}", []>; +def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "movupd {$src, $dst|$dst, $src}", []>; +def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), + "movupd {$src, $dst|$dst, $src}", []>; +def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, V2F64:$src), + "movupd {$src, $dst|$dst, $src}", []>; + +def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops V4F32:$dst, f64mem:$src), + "movlps {$src, $dst|$dst, $src}", []>; +def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, V4F32:$src), + "movlps {$src, $dst|$dst, $src}", []>; +def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops V2F64:$dst, f64mem:$src), + "movlpd {$src, $dst|$dst, $src}", []>; +def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, V2F64:$src), + "movlpd {$src, $dst|$dst, $src}", []>; + +def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops V4F32:$dst, f64mem:$src), + "movhps {$src, $dst|$dst, $src}", []>; +def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, V4F32:$src), + "movhps {$src, $dst|$dst, $src}", []>; +def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops V2F64:$dst, f64mem:$src), + "movhpd {$src, $dst|$dst, $src}", []>; +def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, V2F64:$src), + "movhpd {$src, $dst|$dst, $src}", []>; + +def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "movlhps {$src, $dst|$dst, $src}", []>; +def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "movlhps {$src, $dst|$dst, $src}", []>; + +def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, V4F32:$src), + "movmskps {$src, $dst|$dst, $src}", []>; +def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, V2F64:$src), + "movmskpd {$src, $dst|$dst, $src}", []>; + +// Conversion instructions +def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops V4F32:$dst, V2I32:$src), + "cvtpi2ps {$src, $dst|$dst, $src}", []>; +def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops V4F32:$dst, i64mem:$src), + "cvtpi2ps {$src, $dst|$dst, $src}", []>; +def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops V2F64:$dst, V2I32:$src), + "cvtpi2pd {$src, $dst|$dst, $src}", []>; +def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops V2F64:$dst, i64mem:$src), + "cvtpi2pd {$src, $dst|$dst, $src}", []>; + +// SSE2 instructions without OpSize prefix +def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops V4F32:$dst, V4I32:$src), + "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasSSE2]>; +def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops V4F32:$dst, i128mem:$src), + "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasSSE2]>; + +// SSE2 instructions with XS prefix +def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops V2F64:$dst, V2I32:$src), + "cvtdq2pd {$src, $dst|$dst, $src}", []>, + XS, Requires<[HasSSE2]>; +def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops V4F32:$dst, i64mem:$src), + "cvtdq2pd {$src, $dst|$dst, $src}", []>, + XS, Requires<[HasSSE2]>; + +def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops V2I32:$dst, V4F32:$src), + "cvtps2pi {$src, $dst|$dst, $src}", []>; +def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops V2I32:$dst, f64mem:$src), + "cvtps2pi {$src, $dst|$dst, $src}", []>; +def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops V2I32:$dst, V2F64:$src), + "cvtpd2pi {$src, $dst|$dst, $src}", []>; +def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops V2I32:$dst, f128mem:$src), + "cvtpd2pi {$src, $dst|$dst, $src}", []>; + +def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops V4I32:$dst, V4F32:$src), + "cvtps2dq {$src, $dst|$dst, $src}", []>; +def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops V4I32:$dst, f128mem:$src), + "cvtps2dq {$src, $dst|$dst, $src}", []>; +// SSE2 packed instructions with XD prefix +def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops V4I32:$dst, V2F64:$src), + "cvtpd2dq {$src, $dst|$dst, $src}", []>; +def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops V4I32:$dst, f128mem:$src), + "cvtpd2dq {$src, $dst|$dst, $src}", []>; + +// SSE2 instructions without OpSize prefix +def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops V4I32:$dst, V2F64:$src), + "cvtps2pd {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasSSE2]>; +def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops V4I32:$dst, f64mem:$src), + "cvtps2pd {$src, $dst|$dst, $src}", []>, TB, + Requires<[HasSSE2]>; + +def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops V4F32:$dst, V2F64:$src), + "cvtpd2ps {$src, $dst|$dst, $src}", []>; +def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops V4F32:$dst, f128mem:$src), + "cvtpd2ps {$src, $dst|$dst, $src}", []>; + +// Arithmetic +let isTwoAddress = 1 in { +let isCommutable = 1 in { +def ADDPSrr : PSI<0x58, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "addps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fadd V4F32:$src1, V4F32:$src2))]>; +def ADDPDrr : PDI<0x58, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "addpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fadd V2F64:$src1, V2F64:$src2))]>; +def MULPSrr : PSI<0x59, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "mulps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fmul V4F32:$src1, V4F32:$src2))]>; +def MULPDrr : PDI<0x59, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "mulpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fmul V2F64:$src1, V2F64:$src2))]>; +} + +def ADDPSrm : PSI<0x58, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "addps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fadd V4F32:$src1, + (X86loadv4f32 addr:$src2)))]>; +def ADDPDrm : PDI<0x58, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "addpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fadd V2F64:$src1, + (X86loadv2f64 addr:$src2)))]>; +def MULPSrm : PSI<0x59, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "mulps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fmul V4F32:$src1, + (X86loadv4f32 addr:$src2)))]>; +def MULPDrm : PDI<0x59, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "mulpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fmul V2F64:$src1, + (X86loadv2f64 addr:$src2)))]>; + +def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "divps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fdiv V4F32:$src1, V4F32:$src2))]>; +def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "divps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fdiv V4F32:$src1, + (X86loadv4f32 addr:$src2)))]>; +def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "divpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fdiv V2F64:$src1, V2F64:$src2))]>; +def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "divpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fdiv V2F64:$src1, + (X86loadv2f64 addr:$src2)))]>; + +def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "subps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fsub V4F32:$src1, V4F32:$src2))]>; +def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "subps {$src2, $dst|$dst, $src2}", + [(set V4F32:$dst, (fsub V4F32:$src1, + (X86loadv4f32 addr:$src2)))]>; +def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "subpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fsub V2F64:$src1, V2F64:$src2))]>; +def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "subpd {$src2, $dst|$dst, $src2}", + [(set V2F64:$dst, (fsub V2F64:$src1, + (X86loadv2f64 addr:$src2)))]>; +} + +def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "sqrtps {$src, $dst|$dst, $src}", + [(set V4F32:$dst, (fsqrt V4F32:$src))]>; +def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "sqrtps {$src, $dst|$dst, $src}", + [(set V4F32:$dst, (fsqrt (X86loadv4f32 addr:$src)))]>; +def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "sqrtpd {$src, $dst|$dst, $src}", + [(set V2F64:$dst, (fsqrt V2F64:$src))]>; +def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), + "sqrtpd {$src, $dst|$dst, $src}", + [(set V2F64:$dst, (fsqrt (X86loadv2f64 addr:$src)))]>; + +def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "rsqrtps {$src, $dst|$dst, $src}", []>; +def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "rsqrtps {$src, $dst|$dst, $src}", []>; +def RCPPSrr : PSI<0x53, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "rcpps {$src, $dst|$dst, $src}", []>; +def RCPPSrm : PSI<0x53, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "rcpps {$src, $dst|$dst, $src}", []>; + +def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "maxps {$src, $dst|$dst, $src}", []>; +def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "maxps {$src, $dst|$dst, $src}", []>; +def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "maxpd {$src, $dst|$dst, $src}", []>; +def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), + "maxpd {$src, $dst|$dst, $src}", []>; +def MINPSrr : PSI<0x5D, MRMSrcReg, (ops V4F32:$dst, V4F32:$src), + "minps {$src, $dst|$dst, $src}", []>; +def MINPSrm : PSI<0x5D, MRMSrcMem, (ops V4F32:$dst, f128mem:$src), + "minps {$src, $dst|$dst, $src}", []>; +def MINPDrr : PDI<0x5D, MRMSrcReg, (ops V2F64:$dst, V2F64:$src), + "minpd {$src, $dst|$dst, $src}", []>; +def MINPDrm : PDI<0x5D, MRMSrcMem, (ops V2F64:$dst, f128mem:$src), + "minpd {$src, $dst|$dst, $src}", []>; // Logical let isTwoAddress = 1 in { let isCommutable = 1 in { -def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), +def ANDPSrr : PSI<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), "andps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>, - Requires<[HasSSE1]>, TB; -def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>; +def ANDPDrr : PDI<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), "andpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; -def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>; +def ORPSrr : PSI<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "orps {$src2, $dst|$dst, $src2}", []>; +def ORPDrr : PDI<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>; +def XORPSrr : PSI<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), "xorps {$src2, $dst|$dst, $src2}", - [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>, - Requires<[HasSSE1]>, TB; -def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>; +def XORPDrr : PDI<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), "xorpd {$src2, $dst|$dst, $src2}", - [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>, - Requires<[HasSSE2]>, TB, OpSize; + [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>; } -def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), +def ANDPSrm : PSI<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), "andps {$src2, $dst|$dst, $src2}", [(set V4F32:$dst, (X86fand V4F32:$src1, - (X86loadpv4f32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + (X86loadv4f32 addr:$src2)))]>; +def ANDPDrm : PDI<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), "andpd {$src2, $dst|$dst, $src2}", [(set V2F64:$dst, (X86fand V2F64:$src1, - (X86loadpv2f64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; -def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "orps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "orpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + (X86loadv2f64 addr:$src2)))]>; +def ORPSrm : PSI<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "orps {$src2, $dst|$dst, $src2}", []>; +def ORPDrm : PDI<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "orpd {$src2, $dst|$dst, $src2}", []>; +def XORPSrm : PSI<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), "xorps {$src2, $dst|$dst, $src2}", [(set V4F32:$dst, (X86fxor V4F32:$src1, - (X86loadpv4f32 addr:$src2)))]>, - Requires<[HasSSE1]>, TB; -def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + (X86loadv4f32 addr:$src2)))]>; +def XORPDrm : PDI<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), "xorpd {$src2, $dst|$dst, $src2}", [(set V2F64:$dst, (X86fxor V2F64:$src1, - (X86loadpv2f64 addr:$src2)))]>, - Requires<[HasSSE2]>, TB, OpSize; - -def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), - "andnps {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE1]>, TB; -def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; -def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), - "andnpd {$src2, $dst|$dst, $src2}", []>, - Requires<[HasSSE2]>, TB, OpSize; + (X86loadv2f64 addr:$src2)))]>; +def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>; +def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "andnps {$src2, $dst|$dst, $src2}", []>; +def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>; +def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "andnpd {$src2, $dst|$dst, $src2}", []>; +} + +let isTwoAddress = 1 in { +def CMPPSrr : PSI<0xC2, MRMSrcReg, + (ops V4F32:$dst, V4F32:$src1, V4F32:$src, SSECC:$cc), + "cmp${cc}ps {$src, $dst|$dst, $src}", []>; +def CMPPSrm : PSI<0xC2, MRMSrcMem, + (ops V4F32:$dst, V4F32:$src1, f128mem:$src, SSECC:$cc), + "cmp${cc}ps {$src, $dst|$dst, $src}", []>; +def CMPPDrr : PDI<0xC2, MRMSrcReg, + (ops V2F64:$dst, V2F64:$src1, V2F64:$src, SSECC:$cc), + "cmp${cc}pd {$src, $dst|$dst, $src}", []>; +def CMPPDrm : PDI<0xC2, MRMSrcMem, + (ops V2F64:$dst, V2F64:$src1, f128mem:$src, SSECC:$cc), + "cmp${cc}pd {$src, $dst|$dst, $src}", []>; } +// Shuffle and unpack instructions +def SHUFPSrr : PSI<0xC6, MRMSrcReg, + (ops V4F32:$dst, V4F32:$src1, V4F32:$src2, i8imm:$src3), + "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>; +def SHUFPSrm : PSI<0xC6, MRMSrcMem, + (ops V4F32:$dst, V4F32:$src1, f128mem:$src2, i8imm:$src3), + "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>; +def SHUFPDrr : PDI<0xC6, MRMSrcReg, + (ops V2F64:$dst, V2F64:$src1, V2F64:$src2, i8imm:$src3), + "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>; +def SHUFPDrm : PDI<0xC6, MRMSrcMem, + (ops V2F64:$dst, V2F64:$src1, f128mem:$src2, i8imm:$src3), + "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>; + +def UNPCKHPSrr : PSI<0x15, MRMSrcReg, + (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "unpckhps {$src2, $dst|$dst, $src2}", []>; +def UNPCKHPSrm : PSI<0x15, MRMSrcMem, + (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "unpckhps {$src2, $dst|$dst, $src2}", []>; +def UNPCKHPDrr : PDI<0x15, MRMSrcReg, + (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "unpckhpd {$src2, $dst|$dst, $src2}", []>; +def UNPCKHPDrm : PDI<0x15, MRMSrcMem, + (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "unpckhpd {$src2, $dst|$dst, $src2}", []>; +def UNPCKLPSrr : PSI<0x14, MRMSrcReg, + (ops V4F32:$dst, V4F32:$src1, V4F32:$src2), + "unpcklps {$src2, $dst|$dst, $src2}", []>; +def UNPCKLPSrm : PSI<0x14, MRMSrcMem, + (ops V4F32:$dst, V4F32:$src1, f128mem:$src2), + "unpcklps {$src2, $dst|$dst, $src2}", []>; +def UNPCKLPDrr : PDI<0x14, MRMSrcReg, + (ops V2F64:$dst, V2F64:$src1, V2F64:$src2), + "unpcklpd {$src2, $dst|$dst, $src2}", []>; +def UNPCKLPDrm : PDI<0x14, MRMSrcMem, + (ops V2F64:$dst, V2F64:$src1, f128mem:$src2), + "unpcklpd {$src2, $dst|$dst, $src2}", []>; + //===----------------------------------------------------------------------===// // SSE integer instructions //===----------------------------------------------------------------------===// // Move Instructions -def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; -def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; -def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), - "movd {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; +def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src), + "movd {$src, $dst|$dst, $src}", []>; +def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src), + "movd {$src, $dst|$dst, $src}", []>; +def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src), + "movd {$src, $dst|$dst, $src}", []>; +// SSE2 instructions with XS prefix def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src), "movq {$src, $dst|$dst, $src}", []>, XS, Requires<[HasSSE2]>; def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src), "movq {$src, $dst|$dst, $src}", []>, XS, Requires<[HasSSE2]>; -def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), - "movq {$src, $dst|$dst, $src}", []>, TB, OpSize, - Requires<[HasSSE2]>; + +def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src), + "movq {$src, $dst|$dst, $src}", []>; diff --git a/lib/Target/X86/X86IntelAsmPrinter.h b/lib/Target/X86/X86IntelAsmPrinter.h index 734ff2c..02c654f 100755 --- a/lib/Target/X86/X86IntelAsmPrinter.h +++ b/lib/Target/X86/X86IntelAsmPrinter.h @@ -65,6 +65,10 @@ struct X86IntelAsmPrinter : public X86SharedAsmPrinter { O << "QWORD PTR "; printMemReference(MI, OpNo); } + void printi128mem(const MachineInstr *MI, unsigned OpNo) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo); + } void printf32mem(const MachineInstr *MI, unsigned OpNo) { O << "DWORD PTR "; printMemReference(MI, OpNo); |