diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 24 | ||||
-rw-r--r-- | test/CodeGen/X86/avx-256.ll | 9 |
2 files changed, 22 insertions, 11 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index abfc6e2..0499138 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4673,24 +4673,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT ExtVT = VT.getVectorElementType(); unsigned NumElems = Op.getNumOperands(); - // All zero's: - // - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX) - // All one's: - // - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX) - if (ISD::isBuildVectorAllZeros(Op.getNode()) || - ISD::isBuildVectorAllOnes(Op.getNode())) { - // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to - // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are - // eliminated on x86-32 hosts. + // Vectors containing all zeros can be matched by pxor and xorps later + if (ISD::isBuildVectorAllZeros(Op.getNode())) { + // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd + // and 2) ensure that i64 scalars are eliminated on x86-32 hosts. if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v8i32) return Op; - if (ISD::isBuildVectorAllOnes(Op.getNode())) - return getOnesVector(Op.getValueType(), DAG, dl); return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); } + // Vectors containing all ones can be matched by pcmpeqd on 128-bit width + // vectors or broken into v4i32 operations on 256-bit vectors. + if (ISD::isBuildVectorAllOnes(Op.getNode())) { + if (Op.getValueType() == MVT::v4i32) + return Op; + + return getOnesVector(Op.getValueType(), DAG, dl); + } + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll index 244bf98..337f142 100644 --- a/test/CodeGen/X86/avx-256.ll +++ b/test/CodeGen/X86/avx-256.ll @@ -24,3 +24,12 @@ allocas: float>* %ptr2vec615, align 32 ret void } + +; CHECK: vpcmpeqd +; CHECK: vinsertf128 $1 +define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind { +allocas: + %ptr2vec615 = bitcast [0 x i32]* %RET to <8 x i32>* + store <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32>* %ptr2vec615, align 32 + ret void +} |