diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/cmp.ll | 42 |
2 files changed, 34 insertions, 20 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 34ca24f..42d62b2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17866,10 +17866,10 @@ unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, { ISD::SETCC, MVT::v32i8, 1 }, }; - if (ST.hasSSE42()) { - int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); + if (ST.hasAVX2()) { + int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); if (Idx != -1) - return LT.first * SSE42CostTbl[Idx].Cost; + return LT.first * AVX2CostTbl[Idx].Cost; } if (ST.hasAVX()) { @@ -17878,10 +17878,10 @@ unsigned X86VectorTargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, return LT.first * AVX1CostTbl[Idx].Cost; } - if (ST.hasAVX2()) { - int Idx = FindInTable(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); + if (ST.hasSSE42()) { + int Idx = FindInTable(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); if (Idx != -1) - return LT.first * AVX2CostTbl[Idx].Cost; + return LT.first * SSE42CostTbl[Idx].Cost; } return VectorTargetTransformImpl::getCmpSelInstrCost(Opcode, ValTy, CondTy); diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll index 90b09c1..713b374 100644 --- a/test/Analysis/CostModel/X86/cmp.ll +++ b/test/Analysis/CostModel/X86/cmp.ll @@ -1,38 +1,52 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=AVX1 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=AVX2 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" define i32 @cmp(i32 %arg) { ; -- floats -- - ;CHECK: cost of 1 {{.*}} fcmp + ;AVX1: cost of 1 {{.*}} fcmp + ;AVX2: cost of 1 {{.*}} fcmp %A = fcmp olt <2 x float> undef, undef - ;CHECK: cost of 1 {{.*}} fcmp + ;AVX1: cost of 1 {{.*}} fcmp + ;AVX2: cost of 1 {{.*}} fcmp %B = fcmp olt <4 x float> undef, undef - ;CHECK: cost of 1 {{.*}} fcmp + ;AVX1: cost of 1 {{.*}} fcmp + ;AVX2: cost of 1 {{.*}} fcmp %C = fcmp olt <8 x float> undef, undef - ;CHECK: cost of 1 {{.*}} fcmp + ;AVX1: cost of 1 {{.*}} fcmp + ;AVX2: cost of 1 {{.*}} fcmp %D = fcmp olt <2 x double> undef, undef - ;CHECK: cost of 1 {{.*}} fcmp + ;AVX1: cost of 1 {{.*}} fcmp + ;AVX2: cost of 1 {{.*}} fcmp %E = fcmp olt <4 x double> undef, undef ; -- integers -- - ;CHECK: cost of 1 {{.*}} icmp + ;AVX1: cost of 1 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %F = icmp eq <16 x i8> undef, undef - ;CHECK: cost of 1 {{.*}} icmp + ;AVX1: cost of 1 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %G = icmp eq <8 x i16> undef, undef - ;CHECK: cost of 1 {{.*}} icmp + ;AVX1: cost of 1 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %H = icmp eq <4 x i32> undef, undef - ;CHECK: cost of 1 {{.*}} icmp + ;AVX1: cost of 1 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %I = icmp eq <2 x i64> undef, undef - ;CHECK: cost of 4 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %J = icmp eq <4 x i64> undef, undef - ;CHECK: cost of 4 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %K = icmp eq <8 x i32> undef, undef - ;CHECK: cost of 4 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %L = icmp eq <16 x i16> undef, undef - ;CHECK: cost of 4 {{.*}} icmp + ;AVX1: cost of 4 {{.*}} icmp + ;AVX2: cost of 1 {{.*}} icmp %M = icmp eq <32 x i8> undef, undef ;CHECK: cost of 0 {{.*}} ret |