aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2010-12-04 20:32:23 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2010-12-04 20:32:23 +0000
commit1292c226458b68a119d3a387a0527f453b2065c2 (patch)
tree06a51ac1b82fa451fbf29a41395380f4510ce5f9
parenta6091be84c0ee838fb117b9cfb8eb9c156d44fad (diff)
downloadexternal_llvm-1292c226458b68a119d3a387a0527f453b2065c2.zip
external_llvm-1292c226458b68a119d3a387a0527f453b2065c2.tar.gz
external_llvm-1292c226458b68a119d3a387a0527f453b2065c2.tar.bz2
Add patterns for the x86 popcnt instruction.
- Also adds a new POPCNT subtarget feature that is currently enabled if the target supports SSE4.2 (nehalem) or SSE4A (barcelona). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120917 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86.td8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--lib/Target/X86/X86InstrSSE.td21
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--test/CodeGen/X86/popcnt.ll38
5 files changed, 70 insertions, 15 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 923f894..fa76619 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -23,6 +23,9 @@ include "llvm/Target/Target.td"
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
+def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+ "Support POPCNT instruction">;
+
def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
"Enable MMX instructions">;
@@ -45,7 +48,7 @@ def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
[FeatureSSSE3]>;
def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
- [FeatureSSE41]>;
+ [FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions">;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
@@ -63,7 +66,8 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
"IsUAMemFast", "true",
"Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
- "Support SSE 4a instructions">;
+ "Support SSE 4a instructions",
+ [FeaturePOPCNT]>;
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
"Enable AVX instructions">;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0bbf990..478bf71 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -285,21 +285,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
+ if (Subtarget->hasPOPCNT()) {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
+ } else {
+ setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
+ }
+
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 67cc72f..73a6202 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4603,22 +4603,25 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
//===----------------------------------------------------------------------===//
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
-let mayLoad = 1 in
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+ "popcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+ "popcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
-let mayLoad = 1 in
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+ "popcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 2e73124..58cf3e0 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -65,6 +65,9 @@ protected:
///
bool HasX86_64;
+ /// HasPOPCNT - True if the processor supports POPCNT.
+ bool HasPOPCNT;
+
/// HasSSE4A - True if the processor supports SSE4A instructions.
bool HasSSE4A;
@@ -150,6 +153,7 @@ public:
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAVX() const { return HasAVX; }
bool hasAES() const { return HasAES; }
bool hasCLMUL() const { return HasCLMUL; }
diff --git a/test/CodeGen/X86/popcnt.ll b/test/CodeGen/X86/popcnt.ll
new file mode 100644
index 0000000..430214c
--- /dev/null
+++ b/test/CodeGen/X86/popcnt.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+ %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+ ret i8 %cnt
+; CHECK: cnt8:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+ %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+ ret i16 %cnt
+; CHECK: cnt16:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+ %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+ ret i32 %cnt
+; CHECK: cnt32:
+; CHECK: popcntl
+; CHECK: ret
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+ %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+ ret i64 %cnt
+; CHECK: cnt64:
+; CHECK: popcntq
+; CHECK: ret
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone