diff options
author | Stephen Hines <srhines@google.com> | 2014-04-23 16:57:46 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-04-24 15:53:16 -0700 |
commit | 36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch) | |
tree | e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/Target/X86/X86InstrInfo.cpp | |
parent | 69a8640022b04415ae9fac62f8ab090601d8f889 (diff) | |
download | external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2 |
Update to LLVM 3.5a.
Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 406 |
1 files changed, 207 insertions, 199 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2461773..6450f2a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -605,6 +605,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, + { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, + { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, // AES foldable instructions { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, @@ -1210,8 +1212,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PEXT64rr, X86::PEXT64rm, 0 }, // AVX-512 foldable instructions - { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, - { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, @@ -1224,17 +1224,31 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, + { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, + { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, + { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 }, + { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 }, + { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 }, + { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 }, + { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 }, + { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 }, + { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 }, + { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 }, + { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 }, { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, + { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 }, + { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 }, { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, + { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, // AES foldable instructions { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, @@ -1268,119 +1282,111 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) static const X86OpTblEntry OpTbl3[] = { // FMA foldable instructions - { X86::VFMADDSSr231r, X86::VFMADDSSr231m, 0 }, - { X86::VFMADDSDr231r, X86::VFMADDSDr231m, 0 }, - { X86::VFMADDSSr132r, X86::VFMADDSSr132m, 0 }, - { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 }, - { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 }, - { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 }, - { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 }, - { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 }, - - { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 }, - { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 }, - { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_16 }, - { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_16 }, - { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_16 }, - { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_16 }, - { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_32 }, - { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_32 }, - { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_32 }, - { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 }, - { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 }, - { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 }, - - { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 }, - { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 }, - { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, 0 }, - { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 }, - { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 }, - { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 }, - { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 }, - { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 }, - - { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 }, - { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 }, - { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_16 }, - { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_16 }, - { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_16 }, - { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_16 }, - { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_32 }, - { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_32 }, - { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_32 }, - { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 }, - { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 }, - { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 }, - - { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 }, - { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 }, - { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, 0 }, - { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 }, - { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 }, - { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 }, - { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 }, - { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 }, - - { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 }, - { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 }, - { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_16 }, - { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_16 }, - { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_16 }, - { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_16 }, - { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_32 }, - { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_32 }, - { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_32 }, - { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 }, - { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 }, - { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 }, - - { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 }, - { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 }, - { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, 0 }, - { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 }, - { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 }, - { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 }, - { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 }, - { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 }, - - { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 }, - { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 }, - { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_16 }, - { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_16 }, - { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_16 }, - { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_16 }, - { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_32 }, - { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_32 }, - { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_32 }, - { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 }, - { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 }, - { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 }, - - { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 }, - { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 }, - { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_16 }, - { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_16 }, - { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_16 }, - { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_16 }, - { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 }, - - { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 }, - { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 }, - { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_16 }, - { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_16 }, - { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_16 }, - { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_16 }, - { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, + { X86::VFMADDSSr231r, X86::VFMADDSSr231m, TB_ALIGN_NONE }, + { X86::VFMADDSDr231r, X86::VFMADDSDr231m, TB_ALIGN_NONE }, + { X86::VFMADDSSr132r, X86::VFMADDSSr132m, TB_ALIGN_NONE }, + { X86::VFMADDSDr132r, X86::VFMADDSDr132m, TB_ALIGN_NONE }, + { X86::VFMADDSSr213r, X86::VFMADDSSr213m, TB_ALIGN_NONE }, + { X86::VFMADDSDr213r, X86::VFMADDSDr213m, TB_ALIGN_NONE }, + + { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_NONE }, + { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_NONE }, + { X86::VFMADDPSr132r, X86::VFMADDPSr132m, TB_ALIGN_NONE }, + { X86::VFMADDPDr132r, X86::VFMADDPDr132m, TB_ALIGN_NONE }, + { X86::VFMADDPSr213r, X86::VFMADDPSr213m, TB_ALIGN_NONE }, + { X86::VFMADDPDr213r, X86::VFMADDPDr213m, TB_ALIGN_NONE }, + { X86::VFMADDPSr231rY, X86::VFMADDPSr231mY, TB_ALIGN_NONE }, + { X86::VFMADDPDr231rY, X86::VFMADDPDr231mY, TB_ALIGN_NONE }, + { X86::VFMADDPSr132rY, X86::VFMADDPSr132mY, TB_ALIGN_NONE }, + { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_NONE }, + { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_NONE }, + { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_NONE }, + + { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, TB_ALIGN_NONE }, + { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, TB_ALIGN_NONE }, + { X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, TB_ALIGN_NONE }, + { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, TB_ALIGN_NONE }, + { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, TB_ALIGN_NONE }, + { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, TB_ALIGN_NONE }, + + { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_NONE }, + { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_NONE }, + { X86::VFNMADDPSr132r, X86::VFNMADDPSr132m, TB_ALIGN_NONE }, + { X86::VFNMADDPDr132r, X86::VFNMADDPDr132m, TB_ALIGN_NONE }, + { X86::VFNMADDPSr213r, X86::VFNMADDPSr213m, TB_ALIGN_NONE }, + { X86::VFNMADDPDr213r, X86::VFNMADDPDr213m, TB_ALIGN_NONE }, + { X86::VFNMADDPSr231rY, X86::VFNMADDPSr231mY, TB_ALIGN_NONE }, + { X86::VFNMADDPDr231rY, X86::VFNMADDPDr231mY, TB_ALIGN_NONE }, + { X86::VFNMADDPSr132rY, X86::VFNMADDPSr132mY, TB_ALIGN_NONE }, + { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_NONE }, + { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_NONE }, + { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_NONE }, + + { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, TB_ALIGN_NONE }, + { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, TB_ALIGN_NONE }, + { X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, TB_ALIGN_NONE }, + { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, TB_ALIGN_NONE }, + { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, TB_ALIGN_NONE }, + { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, TB_ALIGN_NONE }, + + { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_NONE }, + { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_NONE }, + { X86::VFMSUBPSr132r, X86::VFMSUBPSr132m, TB_ALIGN_NONE }, + { X86::VFMSUBPDr132r, X86::VFMSUBPDr132m, TB_ALIGN_NONE }, + { X86::VFMSUBPSr213r, X86::VFMSUBPSr213m, TB_ALIGN_NONE }, + { X86::VFMSUBPDr213r, X86::VFMSUBPDr213m, TB_ALIGN_NONE }, + { X86::VFMSUBPSr231rY, X86::VFMSUBPSr231mY, TB_ALIGN_NONE }, + { X86::VFMSUBPDr231rY, X86::VFMSUBPDr231mY, TB_ALIGN_NONE }, + { X86::VFMSUBPSr132rY, X86::VFMSUBPSr132mY, TB_ALIGN_NONE }, + { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_NONE }, + { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_NONE }, + { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_NONE }, + + { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, TB_ALIGN_NONE }, + { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, TB_ALIGN_NONE }, + { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, TB_ALIGN_NONE }, + { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, TB_ALIGN_NONE }, + { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, TB_ALIGN_NONE }, + { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, TB_ALIGN_NONE }, + + { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_NONE }, + { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_NONE }, + { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr132m, TB_ALIGN_NONE }, + { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr132m, TB_ALIGN_NONE }, + { X86::VFNMSUBPSr213r, X86::VFNMSUBPSr213m, TB_ALIGN_NONE }, + { X86::VFNMSUBPDr213r, X86::VFNMSUBPDr213m, TB_ALIGN_NONE }, + { X86::VFNMSUBPSr231rY, X86::VFNMSUBPSr231mY, TB_ALIGN_NONE }, + { X86::VFNMSUBPDr231rY, X86::VFNMSUBPDr231mY, TB_ALIGN_NONE }, + { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr132mY, TB_ALIGN_NONE }, + { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_NONE }, + { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_NONE }, + { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_NONE }, + + { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_NONE }, + { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_NONE }, + { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr132m, TB_ALIGN_NONE }, + { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr132m, TB_ALIGN_NONE }, + { X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr213m, TB_ALIGN_NONE }, + { X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr213m, TB_ALIGN_NONE }, + { X86::VFMADDSUBPSr231rY, X86::VFMADDSUBPSr231mY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPDr231rY, X86::VFMADDSUBPDr231mY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr132mY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_NONE }, + { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_NONE }, + + { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_NONE }, + { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_NONE }, + { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr132m, TB_ALIGN_NONE }, + { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr132m, TB_ALIGN_NONE }, + { X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr213m, TB_ALIGN_NONE }, + { X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr213m, TB_ALIGN_NONE }, + { X86::VFMSUBADDPSr231rY, X86::VFMSUBADDPSr231mY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPDr231rY, X86::VFMSUBADDPDr231mY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr132mY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_NONE }, + { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_NONE }, // FMA4 foldable patterns { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 }, @@ -1420,6 +1426,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, + { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 }, + { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 }, + { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 }, + { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1536,8 +1546,8 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::VMOVDQAYrm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: - case X86::VMOVDQA32rm: - case X86::VMOVDQA64rm: + case X86::VMOVAPSZrm: + case X86::VMOVUPSZrm: return true; } } @@ -1563,6 +1573,8 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::VMOVAPSYmr: case X86::VMOVAPDYmr: case X86::VMOVDQAYmr: + case X86::VMOVUPSZmr: + case X86::VMOVAPSZmr: case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: @@ -1621,9 +1633,9 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) return false; bool isPICBase = false; - for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), - E = MRI.def_end(); I != E; ++I) { - MachineInstr *DefMI = I.getOperand().getParent(); + for (MachineRegisterInfo::def_instr_iterator I = MRI.def_instr_begin(BaseReg), + E = MRI.def_instr_end(); I != E; ++I) { + MachineInstr *DefMI = &*I; if (DefMI->getOpcode() != X86::MOVPC32r) return false; assert(!isPICBase && "More than one PIC base?"); @@ -1809,7 +1821,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } - MachineInstr *NewMI = prior(I); + MachineInstr *NewMI = std::prev(I); NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); } @@ -2452,6 +2464,41 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } } +bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + switch (MI->getOpcode()) { + case X86::VFMADDPDr231r: + case X86::VFMADDPSr231r: + case X86::VFMADDSDr231r: + case X86::VFMADDSSr231r: + case X86::VFMSUBPDr231r: + case X86::VFMSUBPSr231r: + case X86::VFMSUBSDr231r: + case X86::VFMSUBSSr231r: + case X86::VFNMADDPDr231r: + case X86::VFNMADDPSr231r: + case X86::VFNMADDSDr231r: + case X86::VFNMADDSSr231r: + case X86::VFNMSUBPDr231r: + case X86::VFNMSUBPSr231r: + case X86::VFNMSUBSDr231r: + case X86::VFNMSUBSSr231r: + case X86::VFMADDPDr231rY: + case X86::VFMADDPSr231rY: + case X86::VFMSUBPDr231rY: + case X86::VFMSUBPSr231rY: + case X86::VFNMADDPDr231rY: + case X86::VFNMADDPSr231rY: + case X86::VFNMSUBPDr231rY: + case X86::VFNMSUBPSr231rY: + SrcOpIdx1 = 2; + SrcOpIdx2 = 3; + return true; + default: + return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + } +} + static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; @@ -2738,8 +2785,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (llvm::next(I) != MBB.end()) - llvm::next(I)->eraseFromParent(); + while (std::next(I) != MBB.end()) + std::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; @@ -3015,6 +3062,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, return 0; } +inline static bool MaskRegClassContains(unsigned Reg) { + return X86::VK8RegClass.contains(Reg) || + X86::VK16RegClass.contains(Reg) || + X86::VK1RegClass.contains(Reg); +} static unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { if (X86::VR128XRegClass.contains(DestReg, SrcReg) || @@ -3024,11 +3076,23 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) { SrcReg = get512BitSuperRegister(SrcReg); return X86::VMOVAPSZrr; } - if ((X86::VK8RegClass.contains(DestReg) || - X86::VK16RegClass.contains(DestReg)) && - (X86::VK8RegClass.contains(SrcReg) || - X86::VK16RegClass.contains(SrcReg))) + if (MaskRegClassContains(DestReg) && + MaskRegClassContains(SrcReg)) return X86::KMOVWkk; + if (MaskRegClassContains(DestReg) && + (X86::GR32RegClass.contains(SrcReg) || + X86::GR16RegClass.contains(SrcReg) || + X86::GR8RegClass.contains(SrcReg))) { + SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32); + return X86::KMOVWkr; + } + if ((X86::GR32RegClass.contains(DestReg) || + X86::GR16RegClass.contains(DestReg) || + X86::GR8RegClass.contains(DestReg)) && + MaskRegClassContains(SrcReg)) { + DestReg = getX86SubSuperRegister(DestReg, MVT::i32); + return X86::KMOVWrk; + } return 0; } @@ -3837,6 +3901,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); switch (MI->getOpcode()) { + case X86::MOV32r0: + return Expand2AddrUndef(MIB, get(X86::XOR32rr)); case X86::SETB_C8r: return Expand2AddrUndef(MIB, get(X86::SBB8rr)); case X86::SETB_C16r: @@ -3861,6 +3927,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; + case X86::KSET0B: case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr)); case X86::KSET1B: case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr)); @@ -4198,75 +4265,10 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, MI->addRegisterKilled(Reg, TRI, true); } -static MachineInstr* foldPatchpoint(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex, - const TargetInstrInfo &TII) { - unsigned StartIdx = 0; - switch (MI->getOpcode()) { - case TargetOpcode::STACKMAP: - StartIdx = 2; // Skip ID, nShadowBytes. - break; - case TargetOpcode::PATCHPOINT: { - // For PatchPoint, the call args are not foldable. - PatchPointOpers opers(MI); - StartIdx = opers.getVarIdx(); - break; - } - default: - llvm_unreachable("unexpected stackmap opcode"); - } - - // Return false if any operands requested for folding are not foldable (not - // part of the stackmap's live values). - for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end(); - I != E; ++I) { - if (*I < StartIdx) - return 0; - } - - MachineInstr *NewMI = - MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); - MachineInstrBuilder MIB(MF, NewMI); - - // No need to fold return, the meta data, and function arguments - for (unsigned i = 0; i < StartIdx; ++i) - MIB.addOperand(MI->getOperand(i)); - - for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); - if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { - assert(MO.getReg() && "patchpoint can only fold a vreg operand"); - // Compute the spill slot size and offset. - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg()); - unsigned SpillSize; - unsigned SpillOffset; - bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, - SpillOffset, &MF.getTarget()); - if (!Valid) - report_fatal_error("cannot spill patchpoint subregister operand"); - - MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp)); - MIB.addOperand(MachineOperand::CreateImm(SpillSize)); - MIB.addOperand(MachineOperand::CreateFI(FrameIndex)); - addOffset(MIB, SpillOffset); - } - else - MIB.addOperand(MO); - } - return NewMI; -} - MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FrameIndex) const { - // Special case stack map and patch point intrinsics. - if (MI->getOpcode() == TargetOpcode::STACKMAP - || MI->getOpcode() == TargetOpcode::PATCHPOINT) { - return foldPatchpoint(MF, MI, Ops, FrameIndex, *this); - } // Check switch flag if (NoFusing) return NULL; @@ -5172,7 +5174,13 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, - { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr } + { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, + { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm}, + { X86::VBROADCASTSSrr, X86::VBROADCASTSSrr, X86::VPBROADCASTDrr}, + { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr}, + { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm}, + { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr}, + { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm} }; // FIXME: Some shuffle and unpack instructions have equivalents in different @@ -5315,7 +5323,7 @@ namespace { static char ID; CGBR() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { const X86TargetMachine *TM = static_cast<const X86TargetMachine *>(&MF.getTarget()); @@ -5362,11 +5370,11 @@ namespace { return true; } - virtual const char *getPassName() const { + const char *getPassName() const override { return "X86 PIC Global Base Reg Initialization"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -5382,7 +5390,7 @@ namespace { static char ID; LDTLSCleanup() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>(); if (MFI->getNumLocalDynamicTLSAccesses() < 2) { // No point folding accesses if there isn't at least two. @@ -5475,11 +5483,11 @@ namespace { return Copy; } - virtual const char *getPassName() const { + const char *getPassName() const override { return "Local Dynamic TLS Access Clean-up"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); |