From 5f0d9dbdf48a9efe16bfadf88e5335f7b9a8ec3f Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Sat, 2 Mar 2013 04:02:52 +0000 Subject: X86 cost model: Adjust cost for custom lowered vector multiplies This matters for example in following matrix multiply: int **mmult(int rows, int cols, int **m1, int **m2, int **m3) { int i, j, k, val; for (i=0; i gets expanded to a <2 x i64> vector. + ; A <2 x i64> vector multiply is implemented using + ; 3 PMULUDQ and 2 PADDS and 4 shifts. + ;CHECK: cost of 9 {{.*}} mul + %A0 = mul <2 x i32> undef, undef + ;CHECK: cost of 9 {{.*}} mul + %A1 = mul <2 x i64> undef, undef + ;CHECK: cost of 18 {{.*}} mul + %A2 = mul <4 x i64> undef, undef + ret void +} + +; SSE3: sse3mull +define void @sse3mull() { + ; SSE3: cost of 6 {{.*}} mul + %A0 = mul <4 x i32> undef, undef + ret void + ; SSE3: avx2mull +} + +; AVX2: avx2mull +define void @avx2mull() { + ; AVX2: cost of 9 {{.*}} mul + %A0 = mul <4 x i64> undef, undef + ret void + ; AVX2: fmul +} +; CHECK: fmul define i32 @fmul(i32 %arg) { ;CHECK: cost of 1 {{.*}} fmul %A = fmul <4 x float> undef, undef -- cgit v1.1