From a1fe2948ed4039e68d1784494c3b23a4ce4126b4 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Tue, 25 Jun 2013 19:09:50 +0000 Subject: Fix SROA to avoid unnecessary scalar conversions for 1-element vectors. When a 1-element vector alloca is promoted, a store instruction can often be rewritten without converting the value to a scalar and using an insertelement instruction to stuff it into the new alloca. This patch just adds a check to skip that conversion when it is unnecessary. This turns out to be really important for some ARM Neon operations where <1 x i64> is used to get around the fact that i64 is not a legal type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184870 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 31 ++++++++++++++-------------- test/Transforms/ScalarRepl/vector_promote.ll | 24 +++++++++++++++++++++ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d073e78..7fb1dbd 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2591,22 +2591,23 @@ private: bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) { - unsigned BeginIndex = getIndex(BeginOffset); - unsigned EndIndex = getIndex(EndOffset); - assert(EndIndex > BeginIndex && "Empty vector!"); - unsigned NumElements = EndIndex - BeginIndex; - assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - Type *PartitionTy - = (NumElements == 1) ? ElementTy - : VectorType::get(ElementTy, NumElements); - if (V->getType() != PartitionTy) - V = convertValue(TD, IRB, V, PartitionTy); - - // Mix in the existing elements. - Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - "load"); - V = insertVector(IRB, Old, V, BeginIndex, "vec"); + if (V->getType() != VecTy) { + unsigned BeginIndex = getIndex(BeginOffset); + unsigned EndIndex = getIndex(EndOffset); + assert(EndIndex > BeginIndex && "Empty vector!"); + unsigned NumElements = EndIndex - BeginIndex; + assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); + Type *PartitionTy + = (NumElements == 1) ? ElementTy + : VectorType::get(ElementTy, NumElements); + if (V->getType() != PartitionTy) + V = convertValue(TD, IRB, V, PartitionTy); + // Mix in the existing elements. + Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + "load"); + V = insertVector(IRB, Old, V, BeginIndex, "vec"); + } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll index 5c82ae4..03ef475 100644 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ b/test/Transforms/ScalarRepl/vector_promote.ll @@ -111,3 +111,27 @@ entry: ; CHECK-NOT: alloca ; CHECK: and i192 } + +; When promoting an alloca to a 1-element vector type, instructions that +; produce that same vector type should not be changed to insert one element +; into a new vector. +define <1 x i64> @test8(<1 x i64> %a) { +entry: + %a.addr = alloca <1 x i64>, align 8 + %__a = alloca <1 x i64>, align 8 + %tmp = alloca <1 x i64>, align 8 + store <1 x i64> %a, <1 x i64>* %a.addr, align 8 + %0 = load <1 x i64>* %a.addr, align 8 + store <1 x i64> %0, <1 x i64>* %__a, align 8 + %1 = load <1 x i64>* %__a, align 8 + %2 = bitcast <1 x i64> %1 to <8 x i8> + %3 = bitcast <8 x i8> %2 to <1 x i64> + %vshl_n = shl <1 x i64> %3, + store <1 x i64> %vshl_n, <1 x i64>* %tmp + %4 = load <1 x i64>* %tmp + ret <1 x i64> %4 +; CHECK: @test8 +; CHECK-NOT: alloca +; CHECK-NOT: insertelement +; CHECK: ret <1 x i64> +} -- cgit v1.1