aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp8
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll10
-rw-r--r--test/CodeGen/X86/iv-users-in-other-loops.ll22
-rw-r--r--test/Transforms/InstCombine/align-inc.ll5
4 files changed, 20 insertions, 25 deletions
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 7117bda..76b5566 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -9275,7 +9275,7 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
- unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+ unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2), DstAlign);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment()->getZExtValue();
@@ -11097,7 +11097,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
// Attempt to improve the alignment.
- unsigned KnownAlign = GetOrEnforceKnownAlignment(Op);
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
if (KnownAlign >
(LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
LI.getAlignment()))
@@ -11376,7 +11377,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
}
// Attempt to improve the alignment.
- unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr);
+ unsigned KnownAlign =
+ GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
if (KnownAlign >
(SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
SI.getAlignment()))
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 774e724..6c0e76b 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -14,8 +14,8 @@ cond_true: ; preds = %cond_true, %entry
%k.0.0 = bitcast i32 %tmp.10 to i32 ; <i32> [#uses=2]
%tmp31 = add i32 %k.0.0, -1 ; <i32> [#uses=4]
%tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1]
- %tmp34 = bitcast i32* %tmp32 to i8* ; <i8*> [#uses=1]
- %tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp34 ) ; <<16 x i8>> [#uses=1]
+ %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; <i8*> [#uses=1]
+ %tmp = load <16 x i8>* %tmp34, align 1
%tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1]
%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1]
@@ -23,8 +23,8 @@ cond_true: ; preds = %cond_true, %entry
%tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2]
%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp62 = getelementptr i32* %ip, i32 %tmp31 ; <i32*> [#uses=1]
- %tmp65 = bitcast i32* %tmp62 to i8* ; <i8*> [#uses=1]
- %tmp66 = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* %tmp65 ) ; <<16 x i8>> [#uses=1]
+ %tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; <i8*> [#uses=1]
+ %tmp66 = load <16 x i8>* %tmp65, align 1
%tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1]
%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1]
@@ -50,6 +50,4 @@ return: ; preds = %cond_true, %entry
ret void
}
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)
-
declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
index 175f4c0..e8762bc 100644
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ b/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -160,23 +160,23 @@ bb9: ; preds = %bb9, %bb10.preheader
%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec ; <i64> [#uses=2]
%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum ; <float*> [#uses=1]
%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum ; <float*> [#uses=1]
- %61 = bitcast float* %B_addr.438 to i8* ; <i8*> [#uses=1]
- %62 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %61) nounwind readonly ; <<4 x float>> [#uses=1]
+ %61 = bitcast float* %B_addr.438 to <4 x float>* ; <i8*> [#uses=1]
+ %62 = load <4 x float>* %61, align 1
%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4 ; <i64> [#uses=1]
%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169 ; <i64> [#uses=2]
%63 = getelementptr float* %B, i64 %B_addr.0.sum187 ; <float*> [#uses=1]
- %64 = bitcast float* %63 to i8* ; <i8*> [#uses=1]
- %65 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %64) nounwind readonly ; <<4 x float>> [#uses=1]
+ %64 = bitcast float* %63 to <4 x float>* ; <i8*> [#uses=1]
+ %65 = load <4 x float>* %64, align 1
%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8 ; <i64> [#uses=1]
%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168 ; <i64> [#uses=2]
%66 = getelementptr float* %B, i64 %B_addr.0.sum186 ; <float*> [#uses=1]
- %67 = bitcast float* %66 to i8* ; <i8*> [#uses=1]
- %68 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %67) nounwind readonly ; <<4 x float>> [#uses=1]
+ %67 = bitcast float* %66 to <4 x float>* ; <i8*> [#uses=1]
+ %68 = load <4 x float>* %67, align 1
%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12 ; <i64> [#uses=1]
%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167 ; <i64> [#uses=2]
%69 = getelementptr float* %B, i64 %B_addr.0.sum185 ; <float*> [#uses=1]
- %70 = bitcast float* %69 to i8* ; <i8*> [#uses=1]
- %71 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %70) nounwind readonly ; <<4 x float>> [#uses=1]
+ %70 = bitcast float* %69 to <4 x float>* ; <i8*> [#uses=1]
+ %71 = load <4 x float>* %70, align 1
%72 = bitcast float* %A_addr.440 to <4 x float>* ; <<4 x float>*> [#uses=1]
%73 = load <4 x float>* %72, align 16 ; <<4 x float>> [#uses=1]
%74 = mul <4 x float> %73, %62 ; <<4 x float>> [#uses=1]
@@ -214,8 +214,8 @@ bb11: ; preds = %bb11, %bb12.loopexit
%A_addr.529.rec = shl i64 %indvar, 2 ; <i64> [#uses=3]
%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec ; <float*> [#uses=1]
%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec ; <float*> [#uses=1]
- %95 = bitcast float* %B_addr.527 to i8* ; <i8*> [#uses=1]
- %96 = tail call <4 x float> @llvm.x86.sse.loadu.ps(i8* %95) nounwind readonly ; <<4 x float>> [#uses=1]
+ %95 = bitcast float* %B_addr.527 to <4 x float>* ; <i8*> [#uses=1]
+ %96 = load <4 x float>* %95, align 1
%97 = bitcast float* %A_addr.529 to <4 x float>* ; <<4 x float>*> [#uses=1]
%98 = load <4 x float>* %97, align 16 ; <<4 x float>> [#uses=1]
%99 = mul <4 x float> %98, %96 ; <<4 x float>> [#uses=1]
@@ -288,5 +288,3 @@ bb16: ; preds = %bb14, %bb13
store float %Sum0.2.lcssa, float* %C, align 4
ret void
}
-
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
diff --git a/test/Transforms/InstCombine/align-inc.ll b/test/Transforms/InstCombine/align-inc.ll
index 0ad01cb..104d991 100644
--- a/test/Transforms/InstCombine/align-inc.ll
+++ b/test/Transforms/InstCombine/align-inc.ll
@@ -3,12 +3,9 @@
@GLOBAL = internal global [4 x i32] zeroinitializer
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*)
-
-
define <16 x i8> @foo(<2 x i64> %x) {
entry:
- %tmp = tail call <16 x i8> @llvm.x86.sse2.loadu.dq( i8* bitcast ([4 x i32]* @GLOBAL to i8*) )
+ %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
ret <16 x i8> %tmp
}