From 4a1c764264a8908aa041acf12f68cd8bcc2037b1 Mon Sep 17 00:00:00 2001
From: Arnold Schwaighofer <aschwaighofer@apple.com>
Date: Fri, 12 Jul 2013 19:16:04 +0000
Subject: ARM cost model: Add cost for gather/scather

Fixes a 35% degradation compared to unvectorized code in
MiBench/automotive-susan and an equally serious regression on a private
image processing benchmark.

radar://14351991

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186188 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMTargetTransformInfo.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 79f56a4..5cc64de 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -426,6 +426,15 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 }
 
 unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+  // Address computations in vectorized code with non-consecutive addresses will
+  // likely result in more instructions compared to scalar code where the
+  // computation can more often be merged into the index mode. The resulting
+  // extra micro-ops can significantly decrease throughput.
+  unsigned NumVectorInstToHideOverhead = 10;
+
+  if (Ty->isVectorTy() && IsComplex)
+    return NumVectorInstToHideOverhead;
+
   // In many cases the address computation is not merged into the instruction
   // addressing mode.
   return 1;
-- 
cgit v1.1