aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp11
-rw-r--r--test/CodeGen/X86/vec_ctbits.ll51
2 files changed, 56 insertions, 6 deletions
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 6feac0d..cffb0a1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -343,9 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
EVT NVT = Op.getValueType();
Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
- return DAG.getNode(ISD::SUB, dl, NVT, Op,
- DAG.getConstant(NVT.getSizeInBits() -
- OVT.getSizeInBits(), NVT));
+ return DAG.getNode(
+ ISD::SUB, dl, NVT, Op,
+ DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(),
+ NVT));
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
@@ -363,8 +364,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
- APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.setBit(OVT.getSizeInBits());
+ auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
+ OVT.getScalarSizeInBits());
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
}
return DAG.getNode(N->getOpcode(), dl, NVT, Op);
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index bddd535..0aa72b1 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s
declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
@@ -7,12 +7,61 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @footz(<2 x i64> %a) nounwind {
%c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %c
+
+; CHECK-LABEL: footz
+; CHECK: bsfq
+; CHECK: bsfq
}
define <2 x i64> @foolz(<2 x i64> %a) nounwind {
%c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %c
+
+; CHECK-LABEL: foolz
+; CHECK: bsrq
+; CHECK: xorq $63
+; CHECK: bsrq
+; CHECK: xorq $63
}
+
define <2 x i64> @foopop(<2 x i64> %a) nounwind {
%c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %c
}
+
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
+
+define <2 x i32> @promtz(<2 x i32> %a) nounwind {
+ %c = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
+ ret <2 x i32> %c
+
+; CHECK: .quad 4294967296
+; CHECK: .quad 4294967296
+; CHECK-LABEL: promtz
+; CHECK: bsfq
+; CHECK: cmov
+; CHECK: bsfq
+; CHECK: cmov
+}
+define <2 x i32> @promlz(<2 x i32> %a) nounwind {
+ %c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
+ ret <2 x i32> %c
+
+; CHECK: .quad 4294967295
+; CHECK: .quad 4294967295
+; CHECK: .quad 32
+; CHECK: .quad 32
+; CHECK-LABEL: promlz
+; CHECK: pand
+; CHECK: bsrq
+; CHECK: xorq $63
+; CHECK: bsrq
+; CHECK: xorq $63
+; CHECK: psub
+}
+
+define <2 x i32> @prompop(<2 x i32> %a) nounwind {
+ %c = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
+ ret <2 x i32> %c
+}