aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-01-02 19:57:05 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-01-02 19:57:05 +0000
commit80220369b040dd9769e4c4b65d2d018210c3b240 (patch)
tree86c89a65b154febbccfb8218b0b86dcda5252490
parent8c06aa1c597bb5c2c264a190bc35836f0a7b4cd5 (diff)
downloadexternal_llvm-80220369b040dd9769e4c4b65d2d018210c3b240.zip
external_llvm-80220369b040dd9769e4c4b65d2d018210c3b240.tar.gz
external_llvm-80220369b040dd9769e4c4b65d2d018210c3b240.tar.bz2
Try to reuse the value when lowering memset.
This allows us to compile: void test(char *s, int a) { __builtin_memset(s, a, 15); } into 1 mul + 3 stores instead of 3 muls + 3 stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122710 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp24
-rw-r--r--lib/Target/X86/README-X86-64.txt44
-rw-r--r--test/CodeGen/X86/memset-2.ll9
3 files changed, 30 insertions, 47 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b639b7c..110812c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3527,16 +3527,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
- unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff),
isVol, false, Align);
OutChains.push_back(Store);
- DstOff += VTSize;
+ DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index 78c4dc0..e21d69a 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -41,50 +41,6 @@ saved a few instructions.
//===---------------------------------------------------------------------===//
-Poor codegen:
-
-int X[2];
-int b;
-void test(void) {
- memset(X, b, 2*sizeof(X[0]));
-}
-
-llc:
- movq _b@GOTPCREL(%rip), %rax
- movzbq (%rax), %rax
- movq %rax, %rcx
- shlq $8, %rcx
- orq %rax, %rcx
- movq %rcx, %rax
- shlq $16, %rax
- orq %rcx, %rax
- movq %rax, %rcx
- shlq $32, %rcx
- movq _X@GOTPCREL(%rip), %rdx
- orq %rax, %rcx
- movq %rcx, (%rdx)
- ret
-
-gcc:
- movq _b@GOTPCREL(%rip), %rax
- movabsq $72340172838076673, %rdx
- movzbq (%rax), %rax
- imulq %rdx, %rax
- movq _X@GOTPCREL(%rip), %rdx
- movq %rax, (%rdx)
- ret
-
-And the codegen is even worse for the following
-(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
- void fill1(char *s, int a)
- {
- __builtin_memset(s, a, 15);
- }
-
-For this version, we duplicate the computation of the constant to store.
-
-//===---------------------------------------------------------------------===//
-
It's not possible to reference AH, BH, CH, and DH registers in an instruction
requiring REX prefix. However, divb and mulb both produce results in AH. If isel
emits a CopyFromReg which gets turned into a movb and that can be allocated a
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 11f619c..ae6b6e9 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -28,3 +28,12 @@ entry:
; CHECK: imull $16843009
}
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+ ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}