aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-11-02 23:27:16 +0000
committerNadav Rotem <nrotem@apple.com>2012-11-02 23:27:16 +0000
commit0c31e43ff3565f6b801793c916ba102050bcc128 (patch)
treee9da91f68f77c844ccecfa1269f3b2cb9df442de
parent4b5cde26232b3530362d2bfd94477f790a28a222 (diff)
downloadexternal_llvm-0c31e43ff3565f6b801793c916ba102050bcc128.zip
external_llvm-0c31e43ff3565f6b801793c916ba102050bcc128.tar.gz
external_llvm-0c31e43ff3565f6b801793c916ba102050bcc128.tar.bz2
Add a stub for the x86 cost model impl. Implement a basic cost rule for inserting/extracting from XMM registers.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167333 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86ISelLowering.h16
-rw-r--r--lib/Target/X86/X86TargetMachine.h4
-rw-r--r--test/Analysis/CostModel/X86/insert-extract-at-zero.ll33
3 files changed, 51 insertions, 2 deletions
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index c77d8b6..d4c3036 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -19,6 +19,7 @@
#include "X86RegisterInfo.h"
#include "X86MachineFunctionInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetTransformImpl.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -946,6 +947,21 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
}
+
+ class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
+ public:
+ explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
+ VectorTargetTransformImpl(TL) {}
+
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ // Floating point scalars are already located in index #0.
+ if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
+ return 0;
+ return VectorTargetTransformImpl::getVectorInstrCost(Opcode, Val, Index);
+ }
+ };
+
}
#endif // X86ISELLOWERING_H
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 01296c3..12311a1 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -82,7 +82,7 @@ class X86_32TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
+ X86VectorTargetTransformInfo VTTI;
public:
X86_32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -119,7 +119,7 @@ class X86_64TargetMachine : public X86TargetMachine {
X86TargetLowering TLInfo;
X86JITInfo JITInfo;
ScalarTargetTransformImpl STTI;
- VectorTargetTransformImpl VTTI;
+ X86VectorTargetTransformInfo VTTI;
public:
X86_64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
diff --git a/test/Analysis/CostModel/X86/insert-extract-at-zero.ll b/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
new file mode 100644
index 0000000..eea5b60
--- /dev/null
+++ b/test/Analysis/CostModel/X86/insert-extract-at-zero.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @insert-extract-at-zero-idx(i32 %arg, float %fl) {
+ ;CHECK: cost of 0 {{.*}} extract
+ %A = extractelement <4 x float> undef, i32 0
+ ;CHECK: cost of 1 {{.*}} extract
+ %B = extractelement <4 x i32> undef, i32 0
+ ;CHECK: cost of 1 {{.*}} extract
+ %C = extractelement <4 x float> undef, i32 1
+
+ ;CHECK: cost of 0 {{.*}} extract
+ %D = extractelement <8 x float> undef, i32 0
+ ;CHECK: cost of 1 {{.*}} extract
+ %E = extractelement <8 x float> undef, i32 1
+
+ ;CHECK: cost of 1 {{.*}} extract
+ %F = extractelement <8 x float> undef, i32 %arg
+
+ ;CHECK: cost of 0 {{.*}} insert
+ %G = insertelement <4 x float> undef, float %fl, i32 0
+ ;CHECK: cost of 1 {{.*}} insert
+ %H = insertelement <4 x float> undef, float %fl, i32 1
+ ;CHECK: cost of 1 {{.*}} insert
+ %I = insertelement <4 x i32> undef, i32 %arg, i32 0
+
+ ;CHECK: cost of 0 {{.*}} insert
+ %J = insertelement <4 x double> undef, double undef, i32 0
+
+ ret i32 0
+}