diff options
Diffstat (limited to 'lib/Target/X86/X86InstrSSE.td')
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 31 |
1 files changed, 14 insertions, 17 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index bedc5e5..7db456e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -21,8 +21,6 @@ def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2> ]>; -def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>; -def X86loadu : SDNode<"X86ISD::LOAD_UA", SDTLoad, [SDNPHasChain]>; def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, @@ -82,9 +80,6 @@ def sdmem : Operand<v2f64> { // SSE pattern fragments //===----------------------------------------------------------------------===// -def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>; -def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>; - def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; @@ -109,6 +104,8 @@ def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ return false; }]>; +def alignedloadf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; +def alignedloadf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>; def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>; @@ -411,7 +408,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // disregarded. def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps {$src, $dst|$dst, $src}", - [(set FR32:$dst, (X86loadpf32 addr:$src))]>; + [(set FR32:$dst, (alignedloadf32 addr:$src))]>; // Alias bitwise logical operations using SSE logical ops on packed FP values. let isTwoAddress = 1 in { @@ -430,15 +427,15 @@ let isCommutable = 1 in { def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2), "andps {$src2, $dst|$dst, $src2}", [(set FR32:$dst, (X86fand FR32:$src1, - (X86loadpf32 addr:$src2)))]>; + (alignedloadf32 addr:$src2)))]>; def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2), "orps {$src2, $dst|$dst, $src2}", [(set FR32:$dst, (X86for FR32:$src1, - (X86loadpf32 addr:$src2)))]>; + (alignedloadf32 addr:$src2)))]>; def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2), "xorps {$src2, $dst|$dst, $src2}", [(set FR32:$dst, (X86fxor FR32:$src1, - (X86loadpf32 addr:$src2)))]>; + (alignedloadf32 addr:$src2)))]>; def FsANDNPSrr : PSI<0x55, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), @@ -1084,7 +1081,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // disregarded. def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd {$src, $dst|$dst, $src}", - [(set FR64:$dst, (X86loadpf64 addr:$src))]>; + [(set FR64:$dst, (alignedloadf64 addr:$src))]>; // Alias bitwise logical operations using SSE logical ops on packed FP values. let isTwoAddress = 1 in { @@ -1103,15 +1100,15 @@ let isCommutable = 1 in { def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2), "andpd {$src2, $dst|$dst, $src2}", [(set FR64:$dst, (X86fand FR64:$src1, - (X86loadpf64 addr:$src2)))]>; + (alignedloadf64 addr:$src2)))]>; def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2), "orpd {$src2, $dst|$dst, $src2}", [(set FR64:$dst, (X86for FR64:$src1, - (X86loadpf64 addr:$src2)))]>; + (alignedloadf64 addr:$src2)))]>; def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2), "xorpd {$src2, $dst|$dst, $src2}", [(set FR64:$dst, (X86fxor FR64:$src1, - (X86loadpf64 addr:$src2)))]>; + (alignedloadf64 addr:$src2)))]>; def FsANDNPDrr : PDI<0x55, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), @@ -2631,11 +2628,11 @@ def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), (load addr:$src2))), (PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -// Unaligned load -def : Pat<(v4f32 (X86loadu addr:$src)), (MOVUPSrm addr:$src)>, - Requires<[HasSSE1]>; - // Use movaps / movups for SSE integer load / store (one byte shorter). +def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>; +def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>; def : Pat<(alignedloadv2i64 addr:$src), (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>; def : Pat<(loadv2i64 addr:$src), |