diff options
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 31 | ||||
-rw-r--r-- | test/Analysis/CostModel/X86/cast.ll | 75 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/X86/conversion-cost.ll | 9 |
3 files changed, 106 insertions, 9 deletions
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 3e3b86e..2336035 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -271,10 +271,33 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index c8d0f6f..b69b3bf 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -77,3 +77,78 @@ define i32 @masks4(<4 x i1> %in) { ret i32 undef } +define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { + ; CHECK: cost of 3 {{.*}} sitofp + %A1 = sitofp <4 x i1> %a to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %A2 = sitofp <4 x i1> %a to <4 x double> + + ; CHECK: cost of 3 {{.*}} sitofp + %B1 = sitofp <4 x i8> %b to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %B2 = sitofp <4 x i8> %b to <4 x double> + + ; CHECK: cost of 3 {{.*}} sitofp + %C1 = sitofp <4 x i16> %c to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %C2 = sitofp <4 x i16> %c to <4 x double> + + ; CHECK: cost of 1 {{.*}} sitofp + %D1 = sitofp <4 x i32> %d to <4 x float> + ; CHECK: cost of 1 {{.*}} sitofp + %D2 = sitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { + ; CHECK: cost of 8 {{.*}} sitofp + %A1 = sitofp <8 x i1> %a to <8 x float> + + ; CHECK: cost of 8 {{.*}} sitofp + %B1 = sitofp <8 x i8> %b to <8 x float> + + ; CHECK: cost of 5 {{.*}} sitofp + %C1 = sitofp <8 x i16> %c to <8 x float> + + ; CHECK: cost of 1 {{.*}} sitofp + %D1 = sitofp <8 x i32> %d to <8 x float> + ret void +} + +define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { + ; CHECK: cost of 7 {{.*}} uitofp + %A1 = uitofp <4 x i1> %a to <4 x float> + ; CHECK: cost of 7 {{.*}} uitofp + %A2 = uitofp <4 x i1> %a to <4 x double> + + ; CHECK: cost of 2 {{.*}} uitofp + %B1 = uitofp <4 x i8> %b to <4 x float> + ; CHECK: cost of 2 {{.*}} uitofp + %B2 = uitofp <4 x i8> %b to <4 x double> + + ; CHECK: cost of 2 {{.*}} uitofp + %C1 = uitofp <4 x i16> %c to <4 x float> + ; CHECK: cost of 2 {{.*}} uitofp + %C2 = uitofp <4 x i16> %c to <4 x double> + + ; CHECK: cost of 6 {{.*}} uitofp + %D1 = uitofp <4 x i32> %d to <4 x float> + ; CHECK: cost of 6 {{.*}} uitofp + %D2 = uitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { + ; CHECK: cost of 6 {{.*}} uitofp + %A1 = uitofp <8 x i1> %a to <8 x float> + + ; CHECK: cost of 5 {{.*}} uitofp + %B1 = uitofp <8 x i8> %b to <8 x float> + + ; CHECK: cost of 5 {{.*}} uitofp + %C1 = uitofp <8 x i16> %c to <8 x float> + + ; CHECK: cost of 9 {{.*}} uitofp + %D1 = uitofp <8 x i32> %d to <8 x float> + ret void +} diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 23d9233..760d28d 100644 --- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] - %2 = add nsw i64 %indvars.iv, 3 - %3 = trunc i64 %2 to i32 - %4 = sitofp i32 %3 to float - %5 = getelementptr inbounds float* %B, i64 %indvars.iv - store float %4, float* %5, align 4 + %add = add nsw i64 %indvars.iv, 3 + %tofp = sitofp i64 %add to float + %gep = getelementptr inbounds float* %B, i64 %indvars.iv + store float %tofp, float* %gep, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n |