diff options
Diffstat (limited to 'test/Analysis/CostModel')
-rw-r--r-- | test/Analysis/CostModel/ARM/cast.ll | 112 | ||||
-rw-r--r-- | test/Analysis/CostModel/PowerPC/cmp-expanded.ll | 14 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/cast.ll | 43 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/cmp.ll | 11 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/sitofp.ll | 45 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/uitofp.ll | 12 |
6 files changed, 174 insertions, 63 deletions
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll index 662110f..18d6e84 100644 --- a/test/Analysis/CostModel/ARM/cast.ll +++ b/test/Analysis/CostModel/ARM/cast.ll @@ -221,35 +221,35 @@ define i32 @casts() { %r96 = fptoui <2 x float> undef to <2 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r97 = fptosi <2 x float> undef to <2 x i32> - ; CHECK: cost of 28 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r98 = fptoui <2 x float> undef to <2 x i64> - ; CHECK: cost of 28 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r99 = fptosi <2 x float> undef to <2 x i64> - ; CHECK: cost of 8 {{.*}} fptoui + ; CHECK: cost of 16 {{.*}} fptoui %r100 = fptoui <2 x double> undef to <2 x i1> - ; CHECK: cost of 8 {{.*}} fptosi + ; CHECK: cost of 16 {{.*}} fptosi %r101 = fptosi <2 x double> undef to <2 x i1> - ; CHECK: cost of 8 {{.*}} fptoui + ; CHECK: cost of 16 {{.*}} fptoui %r102 = fptoui <2 x double> undef to <2 x i8> - ; CHECK: cost of 8 {{.*}} fptosi + ; CHECK: cost of 16 {{.*}} fptosi %r103 = fptosi <2 x double> undef to <2 x i8> - ; CHECK: cost of 8 {{.*}} fptoui + ; CHECK: cost of 16 {{.*}} fptoui %r104 = fptoui <2 x double> undef to <2 x i16> - ; CHECK: cost of 8 {{.*}} fptosi + ; CHECK: cost of 16 {{.*}} fptosi %r105 = fptosi <2 x double> undef to <2 x i16> ; CHECK: cost of 2 {{.*}} fptoui %r106 = fptoui <2 x double> undef to <2 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r107 = fptosi <2 x double> undef to <2 x i32> - ; CHECK: cost of 28 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r108 = fptoui <2 x double> undef to <2 x i64> - ; CHECK: cost of 28 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r109 = fptosi <2 x double> undef to <2 x i64> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r110 = fptoui <4 x float> undef to <4 x i1> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r111 = fptosi <4 x float> undef to <4 x i1> ; CHECK: cost of 3 {{.*}} fptoui %r112 = fptoui <4 x float> undef to <4 x i8> @@ -263,39 +263,39 @@ define i32 @casts() { %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK: cost of 1 {{.*}} fptosi %r117 = fptosi <4 x float> undef to <4 x i32> - ; CHECK: cost of 56 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r118 = fptoui <4 x float> undef to <4 x i64> - ; CHECK: cost of 56 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r119 = fptosi <4 x float> undef to <4 x i64> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r120 = fptoui <4 x double> undef to <4 x i1> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r121 = fptosi <4 x double> undef to <4 x i1> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r122 = fptoui <4 x double> undef to <4 x i8> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r123 = fptosi <4 x double> undef to <4 x i8> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r124 = fptoui <4 x double> undef to <4 x i16> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r125 = fptosi <4 x double> undef to <4 x i16> - ; CHECK: cost of 16 {{.*}} fptoui + ; CHECK: cost of 32 {{.*}} fptoui %r126 = fptoui <4 x double> undef to <4 x i32> - ; CHECK: cost of 16 {{.*}} fptosi + ; CHECK: cost of 32 {{.*}} fptosi %r127 = fptosi <4 x double> undef to <4 x i32> - ; CHECK: cost of 56 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r128 = fptoui <4 x double> undef to <4 x i64> - ; CHECK: cost of 56 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r129 = fptosi <4 x double> undef to <4 x i64> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r130 = fptoui <8 x float> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r131 = fptosi <8 x float> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r132 = fptoui <8 x float> undef to <8 x i8> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r133 = fptosi <8 x float> undef to <8 x i8> ; CHECK: cost of 4 {{.*}} fptoui %r134 = fptoui <8 x float> undef to <8 x i16> @@ -305,39 +305,39 @@ define i32 @casts() { %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK: cost of 2 {{.*}} fptosi %r137 = fptosi <8 x float> undef to <8 x i32> - ; CHECK: cost of 112 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r138 = fptoui <8 x float> undef to <8 x i64> - ; CHECK: cost of 112 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r139 = fptosi <8 x float> undef to <8 x i64> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r140 = fptoui <8 x double> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r141 = fptosi <8 x double> undef to <8 x i1> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r142 = fptoui <8 x double> undef to <8 x i8> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r143 = fptosi <8 x double> undef to <8 x i8> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r144 = fptoui <8 x double> undef to <8 x i16> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r145 = fptosi <8 x double> undef to <8 x i16> - ; CHECK: cost of 32 {{.*}} fptoui + ; CHECK: cost of 64 {{.*}} fptoui %r146 = fptoui <8 x double> undef to <8 x i32> - ; CHECK: cost of 32 {{.*}} fptosi + ; CHECK: cost of 64 {{.*}} fptosi %r147 = fptosi <8 x double> undef to <8 x i32> - ; CHECK: cost of 112 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r148 = fptoui <8 x double> undef to <8 x i64> - ; CHECK: cost of 112 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r149 = fptosi <8 x double> undef to <8 x i64> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r150 = fptoui <16 x float> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r151 = fptosi <16 x float> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r152 = fptoui <16 x float> undef to <16 x i8> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r153 = fptosi <16 x float> undef to <16 x i8> ; CHECK: cost of 8 {{.*}} fptoui %r154 = fptoui <16 x float> undef to <16 x i16> @@ -347,30 +347,30 @@ define i32 @casts() { %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK: cost of 4 {{.*}} fptosi %r157 = fptosi <16 x float> undef to <16 x i32> - ; CHECK: cost of 224 {{.*}} fptoui + ; CHECK: cost of 256 {{.*}} fptoui %r158 = fptoui <16 x float> undef to <16 x i64> - ; CHECK: cost of 224 {{.*}} fptosi + ; CHECK: cost of 256 {{.*}} fptosi %r159 = fptosi <16 x float> undef to <16 x i64> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r160 = fptoui <16 x double> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r161 = fptosi <16 x double> undef to <16 x i1> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r162 = fptoui <16 x double> undef to <16 x i8> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r163 = fptosi <16 x double> undef to <16 x i8> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r164 = fptoui <16 x double> undef to <16 x i16> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r165 = fptosi <16 x double> undef to <16 x i16> - ; CHECK: cost of 64 {{.*}} fptoui + ; CHECK: cost of 128 {{.*}} fptoui %r166 = fptoui <16 x double> undef to <16 x i32> - ; CHECK: cost of 64 {{.*}} fptosi + ; CHECK: cost of 128 {{.*}} fptosi %r167 = fptosi <16 x double> undef to <16 x i32> - ; CHECK: cost of 224 {{.*}} fptoui + ; CHECK: cost of 256 {{.*}} fptoui %r168 = fptoui <16 x double> undef to <16 x i64> - ; CHECK: cost of 224 {{.*}} fptosi + ; CHECK: cost of 256 {{.*}} fptosi %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK: cost of 8 {{.*}} uitofp diff --git a/test/Analysis/CostModel/PowerPC/cmp-expanded.ll b/test/Analysis/CostModel/PowerPC/cmp-expanded.ll new file mode 100644 index 0000000..38c8439 --- /dev/null +++ b/test/Analysis/CostModel/PowerPC/cmp-expanded.ll @@ -0,0 +1,14 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @exts() { + + ; VSX is disabled, so this cost needs to include scalarization (because + ; <4 x double> is legalized to scalars). + ; CHECK: cost of 44 {{.*}} fcmp + %v1 = fcmp ugt <4 x double> undef, undef + + ret void +} + diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index 7f97b17..fb16af6 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -1,3 +1,4 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX @@ -83,6 +84,19 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK-AVX: cost of 4 {{.*}} zext %D = zext <4 x i32> undef to <4 x i64> + ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext + %D1 = zext <16 x i32> undef to <16 x i64> + + ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext + %D2 = sext <16 x i32> undef to <16 x i64> + + ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext + %D3 = zext <16 x i16> undef to <16 x i32> + ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext + %D4 = zext <16 x i8> undef to <16 x i32> + ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext + %D5 = zext <16 x i1> undef to <16 x i32> + ;CHECK-AVX2: cost of 2 {{.*}} trunc ;CHECK-AVX: cost of 4 {{.*}} trunc %E = trunc <4 x i64> undef to <4 x i32> @@ -101,8 +115,12 @@ define i32 @zext_sext(<8 x i1> %in) { ;CHECK-AVX2: cost of 4 {{.*}} trunc ;CHECK-AVX: cost of 9 {{.*}} trunc + ;CHECK_AVX512: cost of 1 {{.*}} G = trunc %G = trunc <8 x i64> undef to <8 x i32> + ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc + %G1 = trunc <16 x i64> undef to <16 x i32> + ret i32 undef } @@ -207,7 +225,30 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { ; CHECK: cost of 5 {{.*}} uitofp %C1 = uitofp <8 x i16> %c to <8 x float> - ; CHECK: cost of 9 {{.*}} uitofp + ; CHECK-AVX2: cost of 8 {{.*}} uitofp + ; CHECK-AVX512: cost of 8 {{.*}} uitofp + ; CHECK-AVX: cost of 9 {{.*}} uitofp %D1 = uitofp <8 x i32> %d to <8 x float> ret void } + +define void @fp_conv(<8 x float> %a, <16 x float>%b) { +;CHECK-LABEL: for function 'fp_conv' + ; CHECK-AVX512: cost of 1 {{.*}} fpext + %A1 = fpext <8 x float> %a to <8 x double> + + ; CHECK-AVX512: cost of 3 {{.*}} fpext + %A2 = fpext <16 x float> %b to <16 x double> + + ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext + ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext + %A3 = fpext <8 x float> %a to <8 x double> + + ; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc + ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc + %A4 = fptrunc <8 x double> undef to <8 x float> + + ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc + %A5 = fptrunc <16 x double> undef to <16 x float> + ret void +} diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll index 9f2bdb3..469cd73 100644 --- a/test/Analysis/CostModel/X86/cmp.ll +++ b/test/Analysis/CostModel/X86/cmp.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -22,6 +23,11 @@ define i32 @cmp(i32 %arg) { ;AVX2: cost of 1 {{.*}} fcmp %E = fcmp olt <4 x double> undef, undef + ; AVX512: cost of 1 {{.*}} %E1 = fcmp + %E1 = fcmp olt <16 x float> undef, undef + ; AVX512: cost of 2 {{.*}} %E2 = fcmp + %E2 = fcmp olt <16 x double> undef, undef + ; -- integers -- ;AVX1: cost of 1 {{.*}} icmp @@ -49,6 +55,11 @@ define i32 @cmp(i32 %arg) { ;AVX2: cost of 1 {{.*}} icmp %M = icmp eq <32 x i8> undef, undef + ; AVX512: cost of 1 {{.*}} %M1 = icmp + %M1 = icmp eq <16 x i32> undef, undef + ; AVX512: cost of 2 {{.*}} %M2 = icmp + %M2 = icmp eq <16 x i64> undef, undef + ;CHECK: cost of 0 {{.*}} ret ret i32 undef } diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll index 338d974..edc937e 100644 --- a/test/Analysis/CostModel/X86/sitofp.ll +++ b/test/Analysis/CostModel/X86/sitofp.ll @@ -1,4 +1,5 @@ ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s +; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) { ; SSE2: sitofpv2i8v2double @@ -279,3 +280,47 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) { %1 = sitofp <32 x i64> %a to <32 x float> ret <32 x float> %1 } + +; AVX512F-LABEL: sitofp_16i8_float +; AVX512F: cost of 2 {{.*}} sitofp +define <16 x float> @sitofp_16i8_float(<16 x i8> %a) { + %1 = sitofp <16 x i8> %a to <16 x float> + ret <16 x float> %1 +} + +define <16 x float> @sitofp_16i16_float(<16 x i16> %a) { + ; AVX512F-LABEL: sitofp_16i16_float + ; AVX512F: cost of 2 {{.*}} sitofp + %1 = sitofp <16 x i16> %a to <16 x float> + ret <16 x float> %1 +} + +; AVX512F-LABEL: sitofp_8i8_double +; AVX512F: cost of 2 {{.*}} sitofp +define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { + %1 = sitofp <8 x i8> %a to <8 x double> + ret <8 x double> %1 +} + +; AVX512F-LABEL: sitofp_8i16_double +; AVX512F: cost of 2 {{.*}} sitofp +define <8 x double> @sitofp_8i16_double(<8 x i16> %a) { + %1 = sitofp <8 x i16> %a to <8 x double> + ret <8 x double> %1 +} + +; AVX512F-LABEL: sitofp_8i1_double +; AVX512F: cost of 4 {{.*}} sitofp +define <8 x double> @sitofp_8i1_double(<8 x double> %a) { + %cmpres = fcmp ogt <8 x double> %a, zeroinitializer + %1 = sitofp <8 x i1> %cmpres to <8 x double> + ret <8 x double> %1 +} + +; AVX512F-LABEL: sitofp_16i1_float +; AVX512F: cost of 3 {{.*}} sitofp +define <16 x float> @sitofp_16i1_float(<16 x float> %a) { + %cmpres = fcmp ogt <16 x float> %a, zeroinitializer + %1 = sitofp <16 x i1> %cmpres to <16 x float> + ret <16 x float> %1 +} diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll index a41a04d..27ec268 100644 --- a/test/Analysis/CostModel/X86/uitofp.ll +++ b/test/Analysis/CostModel/X86/uitofp.ll @@ -235,7 +235,7 @@ define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) { define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) { ; SSE2: uitofpv4i8v4float - ; SSE2: cost of 15 {{.*}} uitofp + ; SSE2: cost of 8 {{.*}} uitofp %1 = uitofp <4 x i8> %a to <4 x float> ret <4 x float> %1 } @@ -270,7 +270,7 @@ define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) { define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) { ; SSE2: uitofpv4i16v4float - ; SSE2: cost of 15 {{.*}} uitofp + ; SSE2: cost of 8 {{.*}} uitofp %1 = uitofp <4 x i16> %a to <4 x float> ret <4 x float> %1 } @@ -305,28 +305,28 @@ define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) { define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) { ; SSE2: uitofpv4i32v4float - ; SSE2: cost of 15 {{.*}} uitofp + ; SSE2: cost of 8 {{.*}} uitofp %1 = uitofp <4 x i32> %a to <4 x float> ret <4 x float> %1 } define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) { ; SSE2: uitofpv8i32v8float - ; SSE2: cost of 30 {{.*}} uitofp + ; SSE2: cost of 16 {{.*}} uitofp %1 = uitofp <8 x i32> %a to <8 x float> ret <8 x float> %1 } define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) { ; SSE2: uitofpv16i32v16float - ; SSE2: cost of 60 {{.*}} uitofp + ; SSE2: cost of 32 {{.*}} uitofp %1 = uitofp <16 x i32> %a to <16 x float> ret <16 x float> %1 } define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) { ; SSE2: uitofpv32i32v32float - ; SSE2: cost of 120 {{.*}} uitofp + ; SSE2: cost of 64 {{.*}} uitofp %1 = uitofp <32 x i32> %a to <32 x float> ret <32 x float> %1 } |