aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCameron Zwarich <zwarich@apple.com>2011-04-02 02:40:43 +0000
committerCameron Zwarich <zwarich@apple.com>2011-04-02 02:40:43 +0000
commit4071a711126a2a75585a32b96bb5d15ea267a915 (patch)
tree1e693a94ba4f9c310640ecc3540de493f3545a59
parented3caf90866e183380a06c0ae49101204a9f3c28 (diff)
downloadexternal_llvm-4071a711126a2a75585a32b96bb5d15ea267a915.zip
external_llvm-4071a711126a2a75585a32b96bb5d15ea267a915.tar.gz
external_llvm-4071a711126a2a75585a32b96bb5d15ea267a915.tar.bz2
Do some peephole optimizations to remove pointless VMOVs from Neon to integer
registers that arise from argument shuffling with the soft float ABI. These instructions are particularly slow on Cortex A8. This fixes one half of <rdar://problem/8674845>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128759 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp31
-rw-r--r--test/CodeGen/ARM/fp-arg-shuffle.ll11
2 files changed, 42 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 5502ca4..d11c956 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -5561,6 +5561,37 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue InDouble = N->getOperand(0);
if (InDouble.getOpcode() == ARMISD::VMOVDRR)
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+ // vmovrrd(load f64) -> (load i32), (load i32)
+ SDNode *InNode = InDouble.getNode();
+ if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+ InNode->getValueType(0) == MVT::f64 &&
+ InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+ !cast<LoadSDNode>(InNode)->isVolatile()) {
+ // TODO: Should this be done for non-FrameIndex operands?
+ LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue BasePtr = LD->getBasePtr();
+ SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(),
+ std::min(4U, LD->getAlignment() / 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+ DCI.RemoveFromWorklist(LD);
+ DAG.DeleteNode(LD);
+ return Result;
+ }
+
return SDValue();
}
diff --git a/test/CodeGen/ARM/fp-arg-shuffle.ll b/test/CodeGen/ARM/fp-arg-shuffle.ll
new file mode 100644
index 0000000..59303ac
--- /dev/null
+++ b/test/CodeGen/ARM/fp-arg-shuffle.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+neon -float-abi=soft | FileCheck %s
+
+; CHECK: function1
+; CHECK-NOT: vmov r
+define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
+entry:
+ %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
+ ret double %call
+}
+
+declare double @function2(double, double, double, double, double, double)