aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp30
-rw-r--r--test/CodeGen/X86/2011-11-30-or.ll8
-rw-r--r--test/CodeGen/X86/fold-pcmpeqd-2.ll10
-rw-r--r--test/CodeGen/X86/sse2-blend.ll26
4 files changed, 51 insertions, 23 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6cd07c2..4a17dc0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2427,6 +2427,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (and x, 0) -> 0, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+
+ // fold (and x, -1) -> x, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N0;
}
// fold (and x, undef) -> 0
@@ -3025,6 +3037,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (or x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
+
+ // fold (or x, -1) -> -1, vector edition
+ if (ISD::isBuildVectorAllOnes(N0.getNode()))
+ return N0;
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return N1;
}
// fold (or x, undef) -> -1
@@ -3334,6 +3358,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ // fold (xor x, 0) -> x, vector edition
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N0;
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index 0a949eb..f66248b 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -11,12 +11,12 @@ target triple = "x86_64-apple-macosx10.6.6"
define void @select_func() {
entry:
%c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
- %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
%and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
%and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
- %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
- %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
- %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+ %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %and.i.i.i = and <8 x i16> %neg.i.i.i.i, <i16 45, i16 15, i16 95, i16 8, i16 25, i16 65, i16 8, i16 25>
+ %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64>
+ %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i
%a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
ret void
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9cf4607..6cd2761 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -43,21 +43,21 @@ forbody: ; preds = %forcond
%mul171.i = fmul <4 x float> %add167.i, %sub140.i ; <<4 x float>> [#uses=1]
%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 > ; <<4 x float>> [#uses=1]
%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32> ; <<4 x i32>> [#uses=1]
- %andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1]
+ %andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer ; <<4 x i32>> [#uses=1]
%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float> ; <<4 x float>> [#uses=1]
%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer ; <<4 x float>> [#uses=1]
%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32> ; <<4 x i32>> [#uses=1]
- %andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1]
+ %andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer ; <<4 x i32>> [#uses=1]
%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
- %orps203.i = or <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1]
+ %orps203.i = add <4 x i32> %andnps192.i, %xorps.i ; <<4 x i32>> [#uses=1]
%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float> ; <<4 x float>> [#uses=1]
%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2]
%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1]
%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1]
%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2]
- %andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1]
+ %andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13 ; <<4 x i32>> [#uses=1]
%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
- %andnps.i17 = and <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1]
+ %andnps.i17 = add <4 x i32> zeroinitializer, %not.i16 ; <<4 x i32>> [#uses=1]
%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1]
%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1]
%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 2f4317b..67ce1be 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -28,33 +28,31 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
; Without forcing instructions, fall back to the preferred PS domain.
; CHECK: vsel_i64
-; CHECK: xorps
-; CHECK: andps
; CHECK: andnps
+; CHECK: andps
; CHECK: orps
; CHECK: ret
-define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
- %A = load <4 x i64>* %v1
- %B = load <4 x i64>* %v2
- %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
- store <4 x i64 > %vsel, <4 x i64>* %v1
+define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
+ %A = load <2 x i64>* %v1
+ %B = load <2 x i64>* %v2
+ %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
+ store <2 x i64 > %vsel, <2 x i64>* %v1
ret void
}
; Without forcing instructions, fall back to the preferred PS domain.
; CHECK: vsel_double
-; CHECK: xorps
-; CHECK: andps
; CHECK: andnps
+; CHECK: andps
; CHECK: orps
; CHECK: ret
-define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
- %A = load <4 x double>* %v1
- %B = load <4 x double>* %v2
- %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
- store <4 x double > %vsel, <4 x double>* %v1
+define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
+ %A = load <2 x double>* %v1
+ %B = load <2 x double>* %v2
+ %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
+ store <2 x double > %vsel, <2 x double>* %v1
ret void
}