diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-08-11 07:55:09 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-08-11 07:55:09 +0000 |
commit | fac4a4eb7dfbfc90ae1d5c7d6c39a2d89a33c30e (patch) | |
tree | dd79127c979855b250e8b8651917f7142792cd48 /lib/Target/X86/X86InstrInfo.cpp | |
parent | 5b854f1ea55601790d9191c9720e77da35095340 (diff) | |
download | external_llvm-fac4a4eb7dfbfc90ae1d5c7d6c39a2d89a33c30e.zip external_llvm-fac4a4eb7dfbfc90ae1d5c7d6c39a2d89a33c30e.tar.gz external_llvm-fac4a4eb7dfbfc90ae1d5c7d6c39a2d89a33c30e.tar.bz2 |
AVX-512: Added VPERM* instructons and MOV* zmm-to-zmm instructions.
Added a test for shuffles using VPERM.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188147 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 49 |
1 files changed, 42 insertions, 7 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 0443a93..b773768 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -81,6 +81,7 @@ enum { TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT, TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT, TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT, + TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT, TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT }; @@ -1177,6 +1178,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PDEP64rr, X86::PDEP64rm, 0 }, { X86::PEXT32rr, X86::PEXT32rm, 0 }, { X86::PEXT64rr, X86::PEXT64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, + { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, + { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, + { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, + { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, + { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -1454,6 +1463,8 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: + case X86::VMOVDQA32rm: + case X86::VMOVDQA64rm: return true; } } @@ -2890,12 +2901,15 @@ static bool isHReg(unsigned Reg) { // Try and copy between VR128/VR64 and GR64 registers. static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, - bool HasAVX) { + const X86Subtarget& Subtarget) { + + // SrcReg(VR128) -> DestReg(GR64) // SrcReg(VR64) -> DestReg(GR64) // SrcReg(GR64) -> DestReg(VR128) // SrcReg(GR64) -> DestReg(VR64) + bool HasAVX = Subtarget.hasAVX(); if (X86::GR64RegClass.contains(DestReg)) { if (X86::VR128RegClass.contains(SrcReg)) // Copy from a VR128 register to a GR64 register. @@ -2926,13 +2940,31 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, return 0; } +static +unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { + if (X86::VR128XRegClass.contains(DestReg, SrcReg) || + X86::VR256XRegClass.contains(DestReg, SrcReg) || + X86::VR512RegClass.contains(DestReg, SrcReg)) { + DestReg = get512BitSuperRegister(DestReg); + SrcReg = get512BitSuperRegister(SrcReg); + return X86::VMOVAPSZrr; + } + if ((X86::VK8RegClass.contains(DestReg) || + X86::VK16RegClass.contains(DestReg)) && + (X86::VK8RegClass.contains(SrcReg) || + X86::VK16RegClass.contains(SrcReg))) + return X86::KMOVWkk; + return 0; +} + void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // First deal with the normal symmetric copies. bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - unsigned Opc; + bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512(); + unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; else if (X86::GR32RegClass.contains(DestReg, SrcReg)) @@ -2950,14 +2982,17 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, "8-bit H register can not be copied outside GR8_NOREX"); } else Opc = X86::MOV8rr; - } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) + } + else if (X86::VR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MMX_MOVQ64rr; + else if (HasAVX512) + Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg); + else if (X86::VR128RegClass.contains(DestReg, SrcReg)) Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr; else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Opc = X86::VMOVAPSYrr; - else if (X86::VR64RegClass.contains(DestReg, SrcReg)) - Opc = X86::MMX_MOVQ64rr; - else - Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX); + if (!Opc) + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>()); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) |