aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCameron McInally <cameron.mcinally@nyu.edu>2013-10-31 13:56:31 +0000
committerCameron McInally <cameron.mcinally@nyu.edu>2013-10-31 13:56:31 +0000
commit424f19732e948a854b454c70722a4eca8957bc20 (patch)
tree632126a47cc5d0ab2e8ea22ef8e8dce469346494
parentf3280b5860734b367cdcdd9909a7f4f4645369fc (diff)
downloadexternal_llvm-424f19732e948a854b454c70722a4eca8957bc20.zip
external_llvm-424f19732e948a854b454c70722a4eca8957bc20.tar.gz
external_llvm-424f19732e948a854b454c70722a4eca8957bc20.tar.bz2
Add AVX512 unmasked integer broadcast intrinsics and support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193748 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/IR/IntrinsicsX86.td12
-rw-r--r--lib/Target/X86/X86InstrAVX512.td10
-rw-r--r--test/CodeGen/X86/avx512-intrinsics.ll28
3 files changed, 50 insertions, 0 deletions
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 74df280..c24fb06 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -2690,6 +2690,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_vbroadcast_sd_pd_512 :
GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_pbroadcastd_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastd_i32_512 :
+ Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_pbroadcastq_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastq_i64_512 :
+ Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
}
// Vector sign and zero extend
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 1ac5630..5e854da 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -425,6 +425,11 @@ def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
(VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
+def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
+ (VPBROADCASTDrZrr GR32:$src)>;
+def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
+ (VPBROADCASTQrZrr GR64:$src)>;
+
multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, PatFrag ld_frag,
RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
@@ -461,6 +466,11 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VT1>;
+def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
+ (VPBROADCASTDZrr VR128X:$src)>;
+def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
+ (VPBROADCASTQZrr VR128X:$src)>;
+
def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
(VBROADCASTSSZrr VR128X:$src)>;
def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 29b5083..8a51248 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -234,6 +234,34 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
}
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
+define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
+ ; CHECK: vpbroadcastd
+ %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
+
+define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
+ ; CHECK: vpbroadcastd
+ %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
+
+define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
+ ; CHECK: vpbroadcastq
+ %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
+
+define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
+ ; CHECK: vpbroadcastq
+ %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
+
define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) {
; CHECK: vpmaxud
%res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]