diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-11-14 06:46:21 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-11-14 06:46:21 +0000 |
commit | 3426a3efef546a20adf9e2064f589417ab89442b (patch) | |
tree | c15e887ec7c7ce3959692d55336a792d1e455d87 | |
parent | dc9205d9c29171f1ddcf2de7eb172a583cadbe63 (diff) | |
download | external_llvm-3426a3efef546a20adf9e2064f589417ab89442b.zip external_llvm-3426a3efef546a20adf9e2064f589417ab89442b.tar.gz external_llvm-3426a3efef546a20adf9e2064f589417ab89442b.tar.bz2 |
Add neverHasSideEffects, mayLoad, and mayStore to many patternless SSE/AVX instructions. Remove MMX check from LowerVECTOR_SHUFFLE since MMX vector types won't go through it anyway.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144522 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 49 |
2 files changed, 37 insertions, 19 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cf03c30..5d16f47 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6623,7 +6623,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); - bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; @@ -6632,9 +6631,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); - // Shuffle operations on MMX not supported. - if (isMMX) - return Op; + assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); // Vector shuffle lowering takes 3 steps: // @@ -6646,7 +6643,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // so the shuffle can be broken into other shuffles and the legalizer can // try the lowering again. // - // The general ideia is that no vector_shuffle operation should be left to + // The general idea is that no vector_shuffle operation should be left to // be matched during isel, all of them must be converted to a target specific // node here. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 735a30f..caaf544 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -80,8 +80,9 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, string OpcodeStr, X86MemOperand x86memop, list<dag> pat_rr, list<dag> pat_rm, - bit Is2Addr = 1> { - let isCommutable = 1 in + bit Is2Addr = 1, + bit rr_hasSideEffects = 0> { + let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), @@ -2629,7 +2630,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, !strconcat(OpcodeStr, "ps"), f128mem, [], [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V; + (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, !strconcat(OpcodeStr, "pd"), f128mem, @@ -2926,12 +2927,15 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { + let neverHasSideEffects = 1 in { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + let mayLoad = 1 in def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + } def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat(OpcodeStr, @@ -3799,14 +3803,15 @@ let ExeDomain = SSEPackedInt in { (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "psrldq\t{$src2, $dst|$dst, $src2}", []>; // PSRADQri doesn't exist in SSE[1-3]. - } - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; + def PANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", []>; - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; + let mayLoad = 1 in + def PANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", []>; + } } } // Constraints = "$src1 = $dst" @@ -5348,6 +5353,7 @@ let Predicates = [HasAVX] in { //===---------------------------------------------------------------------===// multiclass ssse3_palign<string asm, bit Is2Addr = 1> { + let neverHasSideEffects = 1 in { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !if(Is2Addr, @@ -5355,6 +5361,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), []>, OpSize; + let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !if(Is2Addr, @@ -5362,19 +5369,23 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), []>, OpSize; + } } multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> { + let neverHasSideEffects = 1 in { def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; + let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; + } } let Predicates = [HasAVX] in @@ -5721,6 +5732,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, OpSize; + let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, @@ -5743,6 +5755,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">; /// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { + let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8<opc, MRMDestMem, (outs), (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, @@ -6720,19 +6733,21 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } -let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in { +let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + let mayLoad = 1 in def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; } -let Defs = [XMM0, EFLAGS] in { +let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + let mayLoad = 1 in def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; @@ -6756,19 +6771,21 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { } let Predicates = [HasAVX], - Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + let mayLoad = 1 in def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; } -let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + let mayLoad = 1 in def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; @@ -7071,12 +7088,14 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), //===----------------------------------------------------------------------===// // Carry-less Multiplication instructions +let neverHasSideEffects = 1 in { let Constraints = "$src1 = $dst" in { def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; +let mayLoad = 1 in def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -7089,10 +7108,12 @@ def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; +let mayLoad = 1 in def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; +} multiclass pclmul_alias<string asm, int immop> { |