aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2009-12-24 01:07:17 +0000
committerChris Lattner <sabre@nondot.org>2009-12-24 01:07:17 +0000
commit04b091a7822c60512285ee66b058f98399bf1cf9 (patch)
tree2898e7bc54044d523c88070d7e14ea1c7eb91ed3
parent7ed6dd61ac904f6a50318f557ac0f389a4dbf6a9 (diff)
downloadexternal_llvm-04b091a7822c60512285ee66b058f98399bf1cf9.zip
external_llvm-04b091a7822c60512285ee66b058f98399bf1cf9.tar.gz
external_llvm-04b091a7822c60512285ee66b058f98399bf1cf9.tar.bz2
handle equality memcmp of 8 bytes on x86-64 with two unaligned loads and a
compare. On other targets we end up with a call to memcmp because we don't want 16 individual byte loads. We should be able to use movups as well, but we're failing to select the generated icmp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92107 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp74
-rw-r--r--test/CodeGen/X86/memcmp.ll42
2 files changed, 93 insertions, 23 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e194003..8fe7c45 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5092,17 +5092,8 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
return true;
}
-static SDValue getMemCmpLoad(Value *PtrVal, unsigned Size,
+static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
SelectionDAGBuilder &Builder) {
- MVT LoadVT;
- const Type *LoadTy;
- if (Size == 2) {
- LoadVT = MVT::i16;
- LoadTy = Type::getInt16Ty(PtrVal->getContext());
- } else {
- LoadVT = MVT::i32;
- LoadTy = Type::getInt32Ty(PtrVal->getContext());
- }
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
@@ -5158,16 +5149,61 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
- if (Size && (Size->getValue() == 2 || Size->getValue() == 4) &&
- IsOnlyUsedInZeroEqualityComparison(&I)) {
- SDValue LHSVal = getMemCmpLoad(LHS, Size->getZExtValue(), *this);
- SDValue RHSVal = getMemCmpLoad(RHS, Size->getZExtValue(), *this);
+ if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+ bool ActuallyDoIt = true;
+ MVT LoadVT;
+ const Type *LoadTy;
+ switch (Size->getZExtValue()) {
+ default:
+ LoadVT = MVT::Other;
+ LoadTy = 0;
+ ActuallyDoIt = false;
+ break;
+ case 2:
+ LoadVT = MVT::i16;
+ LoadTy = Type::getInt16Ty(Size->getContext());
+ break;
+ case 4:
+ LoadVT = MVT::i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ break;
+ case 8:
+ LoadVT = MVT::i64;
+ LoadTy = Type::getInt64Ty(Size->getContext());
+ break;
+ /*
+ case 16:
+ LoadVT = MVT::v4i32;
+ LoadTy = Type::getInt32Ty(Size->getContext());
+ LoadTy = VectorType::get(LoadTy, 4);
+ break;
+ */
+ }
+
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+
+ // Require that we can find a legal MVT, and only do this if the target
+ // supports unaligned loads of that type. Expanding into byte loads would
+ // bloat the code.
+ if (ActuallyDoIt && Size->getZExtValue() > 4) {
+ // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+ // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+ if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+ ActuallyDoIt = false;
+ }
- SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
- ISD::SETNE);
- EVT CallVT = TLI.getValueType(I.getType(), true);
- setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
- return true;
+ if (ActuallyDoIt) {
+ SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+ SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+ SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+ ISD::SETNE);
+ EVT CallVT = TLI.getValueType(I.getType(), true);
+ setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+ return true;
+ }
}
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index f6086c4..b90d2e2 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -3,7 +3,7 @@
; This tests codegen time inlining/optimization of memcmp
; rdar://6480398
-@.str = private constant [6 x i8] c"fooxx\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1]
declare i32 @memcmp(...)
@@ -26,7 +26,7 @@ return: ; preds = %entry
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -60,7 +60,7 @@ return: ; preds = %entry
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -71,6 +71,40 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: memcmp4a:
-; CHECK: cmpl $2021158767, (%rdi)
+; CHECK: cmpl $1869573999, (%rdi)
+}
+
+define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
+ %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
+ br i1 %1, label %return, label %bb
+
+bb: ; preds = %entry
+ store i32 4, i32* %P, align 4
+ ret void
+
+return: ; preds = %entry
+ ret void
+; CHECK: memcmp8:
+; CHECK: movq (%rsi), %rax
+; CHECK: cmpq %rax, (%rdi)
+}
+
+define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+ %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
+ %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
+ br i1 %1, label %return, label %bb
+
+bb: ; preds = %entry
+ store i32 4, i32* %P, align 4
+ ret void
+
+return: ; preds = %entry
+ ret void
+; CHECK: memcmp8a:
+; CHECK: movabsq $8029759185026510694, %rax
+; CHECK: cmpq %rax, (%rdi)
}