diff options
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 18 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-select.ll | 22 |
3 files changed, 46 insertions, 2 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5dbef0f..7648ddc6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15750,6 +15750,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::CMOV_V8F32: case X86::CMOV_V4F64: case X86::CMOV_V4I64: + case X86::CMOV_V16F32: + case X86::CMOV_V8F64: + case X86::CMOV_V8I64: case X86::CMOV_GR16: case X86::CMOV_GR32: case X86::CMOV_RFP32: @@ -16633,8 +16636,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } - if (Subtarget->hasAVX512() && VT.isVector() && - Cond.getValueType().getVectorElementType() == MVT::i1) { + EVT CondVT = Cond.getValueType(); + if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() && + CondVT.getVectorElementType() == MVT::i1) { // v16i8 (select v16i1, v16i8, v16i8) does not have a proper // lowering on AVX-512. In this case we convert it to // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction. diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 9e1cdc9..7d10b67 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -884,6 +884,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { [(set VR256:$dst, (v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond, EFLAGS)))]>; + def CMOV_V8I64 : I<0, Pseudo, + (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond), + "#CMOV_V8I64 PSEUDO!", + [(set VR512:$dst, + (v8i64 (X86cmov VR512:$t, VR512:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V8F64 : I<0, Pseudo, + (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond), + "#CMOV_V8F64 PSEUDO!", + [(set VR512:$dst, + (v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond, + EFLAGS)))]>; + def CMOV_V16F32 : I<0, Pseudo, + (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond), + "#CMOV_V16F32 PSEUDO!", + [(set VR512:$dst, + (v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond, + EFLAGS)))]>; } diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll new file mode 100644 index 0000000..d2d6681 --- /dev/null +++ b/test/CodeGen/X86/avx512-select.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +; CHECK-LABEL: select00 +; CHECK: vmovaps +; CHECK-NEXT: LBB +define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind { + %cmpres = icmp eq i32 %a, 255 + %selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b + %res = xor <16 x i32> %b, %selres + ret <16 x i32> %res +} + +; CHECK-LABEL: select01 +; CHECK: vmovaps +; CHECK-NEXT: LBB +define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind { + %cmpres = icmp eq i32 %a, 255 + %selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b + %res = xor <8 x i64> %b, %selres + ret <8 x i64> %res +} + |