aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-14 06:46:21 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-14 06:46:21 +0000
commit3426a3efef546a20adf9e2064f589417ab89442b (patch)
treec15e887ec7c7ce3959692d55336a792d1e455d87
parentdc9205d9c29171f1ddcf2de7eb172a583cadbe63 (diff)
downloadexternal_llvm-3426a3efef546a20adf9e2064f589417ab89442b.zip
external_llvm-3426a3efef546a20adf9e2064f589417ab89442b.tar.gz
external_llvm-3426a3efef546a20adf9e2064f589417ab89442b.tar.bz2
Add neverHasSideEffects, mayLoad, and mayStore to many patternless SSE/AVX instructions. Remove MMX check from LowerVECTOR_SHUFFLE since MMX vector types won't go through it anyway.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144522 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--lib/Target/X86/X86InstrSSE.td49
2 files changed, 37 insertions, 19 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cf03c30..5d16f47 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6623,7 +6623,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
unsigned NumElems = VT.getVectorNumElements();
- bool isMMX = VT.getSizeInBits() == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
@@ -6632,9 +6631,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
- // Shuffle operations on MMX not supported.
- if (isMMX)
- return Op;
+ assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
// Vector shuffle lowering takes 3 steps:
//
@@ -6646,7 +6643,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// so the shuffle can be broken into other shuffles and the legalizer can
// try the lowering again.
//
- // The general ideia is that no vector_shuffle operation should be left to
+ // The general idea is that no vector_shuffle operation should be left to
// be matched during isel, all of them must be converted to a target specific
// node here.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 735a30f..caaf544 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -80,8 +80,9 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
string OpcodeStr, X86MemOperand x86memop,
list<dag> pat_rr, list<dag> pat_rm,
- bit Is2Addr = 1> {
- let isCommutable = 1 in
+ bit Is2Addr = 1,
+ bit rr_hasSideEffects = 0> {
+ let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -2629,7 +2630,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+ (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
@@ -2926,12 +2927,15 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1 in {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ let mayLoad = 1 in
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ }
def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
@@ -3799,14 +3803,15 @@ let ExeDomain = SSEPackedInt in {
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}", []>;
// PSRADQri doesn't exist in SSE[1-3].
- }
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}", []>;
+ let mayLoad = 1 in
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}", []>;
+ }
}
} // Constraints = "$src1 = $dst"
@@ -5348,6 +5353,7 @@ let Predicates = [HasAVX] in {
//===---------------------------------------------------------------------===//
multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
@@ -5355,6 +5361,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>, OpSize;
+ let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
!if(Is2Addr,
@@ -5362,19 +5369,23 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[]>, OpSize;
+ }
}
multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+ let neverHasSideEffects = 1 in {
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
+ let mayLoad = 1 in
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2, i8imm:$src3),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, OpSize;
+ }
}
let Predicates = [HasAVX] in
@@ -5721,6 +5732,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
OpSize;
+ let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
@@ -5743,6 +5755,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
@@ -6720,19 +6733,21 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
-let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ let mayLoad = 1 in
def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
}
-let Defs = [XMM0, EFLAGS] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ let mayLoad = 1 in
def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
@@ -6756,19 +6771,21 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
}
let Predicates = [HasAVX],
- Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ let mayLoad = 1 in
def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
}
-let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ let mayLoad = 1 in
def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
@@ -7071,12 +7088,14 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
//===----------------------------------------------------------------------===//
// Carry-less Multiplication instructions
+let neverHasSideEffects = 1 in {
let Constraints = "$src1 = $dst" in {
def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[]>;
+let mayLoad = 1 in
def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -7089,10 +7108,12 @@ def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
+let mayLoad = 1 in
def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
+}
multiclass pclmul_alias<string asm, int immop> {