1 files changed, 19 insertions, 8 deletions
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 53ece66..34576ba 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -124,7 +124,7 @@ public:
 
   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
 
-  unsigned getAddressComputationCost(Type *Val) const;
+  unsigned getAddressComputationCost(Type *Val, bool IsComplex) const;
 
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
                                   OperandValueKind Op1Info = OK_AnyValue,
@@ -411,12 +411,14 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 
     EVT SelCondTy = TLI->getValueType(CondTy);
     EVT SelValTy = TLI->getValueType(ValTy);
-    int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
-                                          array_lengthof(NEONVectorSelectTbl),
-                                          ISD, SelCondTy.getSimpleVT(),
-                                          SelValTy.getSimpleVT());
-    if (Idx != -1)
-      return NEONVectorSelectTbl[Idx].Cost;
+    if (SelCondTy.isSimple() && SelValTy.isSimple()) {
+      int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
+                                            array_lengthof(NEONVectorSelectTbl),
+                                            ISD, SelCondTy.getSimpleVT(),
+                                            SelValTy.getSimpleVT());
+      if (Idx != -1)
+        return NEONVectorSelectTbl[Idx].Cost;
+    }
 
     std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
     return LT.first;
@@ -425,7 +427,16 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
   return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+  // Address computations in vectorized code with non-consecutive addresses will
+  // likely result in more instructions compared to scalar code where the
+  // computation can more often be merged into the index mode. The resulting
+  // extra micro-ops can significantly decrease throughput.
+  unsigned NumVectorInstToHideOverhead = 10;
+
+  if (Ty->isVectorTy() && IsComplex)
+    return NumVectorInstToHideOverhead;
+
   // In many cases the address computation is not merged into the instruction
   // addressing mode.
   return 1;