aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Target/TargetLowering.h7
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp65
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp5
-rw-r--r--lib/Target/ARM/ARMISelLowering.h6
-rw-r--r--test/CodeGen/ARM/ldst-f32-2-i32.ll28
5 files changed, 109 insertions, 2 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index c81d1f7..5141b7b 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -963,6 +963,13 @@ public:
return isTypeLegal(VT);
}
+ /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner
+ /// to transform a floating point op of specified opcode to a equivalent op of
+ /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM.
+ virtual bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const {
+ return false;
+ }
+
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e91592b..dd7d56a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -42,6 +42,7 @@ STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
namespace {
static cl::opt<bool>
@@ -234,6 +235,7 @@ namespace {
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -6111,6 +6113,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
}
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -6210,6 +6269,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 5055767..92ea6cb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -5724,6 +5724,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+ EVT VT) const {
+ return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
if (!Subtarget->allowsUnalignedMem())
return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 28bf60c..b06b8d3 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -213,14 +213,16 @@ namespace llvm {
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
virtual const char *getTargetNodeName(unsigned Opcode) const;
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
/// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll
new file mode 100644
index 0000000..2d016f6
--- /dev/null
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+ %src6 = bitcast float* %src to i8*
+ %0 = icmp eq i32 %width, 0
+ br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
+; CHECK: str [[REGISTER]], [r2], #4
+ %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+ %tmp = mul i32 %j.05, %index
+ %uglygep = getelementptr i8* %src6, i32 %tmp
+ %src_addr.04 = bitcast i8* %uglygep to float*
+ %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+ %1 = load float* %src_addr.04, align 4
+ store float %1, float* %dst_addr.03, align 4
+ %2 = add i32 %j.05, 1
+ %exitcond = icmp eq i32 %2, %width
+ br i1 %exitcond, label %return, label %bb
+
+return:
+ ret void
+}