aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/X86/X86InstrInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp464
1 files changed, 332 insertions, 132 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0d3afc4..7f87bdd 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -100,8 +101,8 @@ void X86InstrInfo::anchor() {}
X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
: X86GenInstrInfo(
- (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
- (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
Subtarget(STI), RI(STI) {
static const X86OpTblEntry OpTbl2Addr[] = {
@@ -377,7 +378,39 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
{ X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
// AVX-512 foldable instructions
- { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }
+ { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions (128-bit versions)
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
@@ -415,6 +448,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
{ X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
{ X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
+ { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
+ { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
+ { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
{ X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
{ X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
{ X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
@@ -493,6 +529,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
{ X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
{ X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
+ { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
+ { X86::VCVTPD2DQrr, X86::VCVTPD2DQXrm, 0 },
+ { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
+ { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
+ { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
{ X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
@@ -526,6 +567,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
// AVX 256-bit foldable instructions
+ { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
+ { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
+ { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
+ { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
+ { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
{ X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
{ X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
@@ -533,6 +579,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
{ X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
{ X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
+ { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
+ { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
+ { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
+ { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
+ { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
+ { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
+ { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// AVX2 foldable instructions
{ X86::VPABSBrr256, X86::VPABSBrm256, 0 },
@@ -541,13 +594,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
{ X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
{ X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
- { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
- { X86::VRCPPSYr_Int, X86::VRCPPSYm_Int, 0 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
- { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
// BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
{ X86::BEXTR32rr, X86::BEXTR32rm, 0 },
@@ -601,18 +647,46 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
- { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 },
- { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
- { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
- { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
+ { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
+ { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
+ { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
+ { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
+ { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
+ { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
+ { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
+ { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
+ { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
+ { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
+ { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
+ { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
+ { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
+ { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
+ // AVX-512 foldable instructions (256-bit versions)
+ { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
+ { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
+ { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
+ { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
+ { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
+ { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
+ { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
+ { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
// AES foldable instructions
{ X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
{ X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
{ X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 },
- { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 },
+ { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 }
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -869,8 +943,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
{ X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
{ X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQXrm, 0 },
- { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VSQRTSSr, X86::VSQRTSSm, 0 },
@@ -1543,8 +1615,11 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::VMOVAPSrm:
case X86::VMOVAPDrm:
case X86::VMOVDQArm:
+ case X86::VMOVUPSYrm:
case X86::VMOVAPSYrm:
+ case X86::VMOVUPDYrm:
case X86::VMOVAPDYrm:
+ case X86::VMOVDQUYrm:
case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
@@ -1572,8 +1647,11 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::VMOVAPSmr:
case X86::VMOVAPDmr:
case X86::VMOVDQAmr:
+ case X86::VMOVUPSYmr:
case X86::VMOVAPSYmr:
+ case X86::VMOVUPDYmr:
case X86::VMOVAPDYmr:
+ case X86::VMOVDQUYmr:
case X86::VMOVDQAYmr:
case X86::VMOVUPSZmr:
case X86::VMOVAPSZmr:
@@ -2078,34 +2156,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
- case X86::SHUFPSrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- unsigned M = MI->getOperand(3).getImm();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
- case X86::SHUFPDrri: {
- assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!Subtarget.hasSSE2()) return nullptr;
-
- unsigned B = MI->getOperand(1).getReg();
- unsigned C = MI->getOperand(2).getReg();
- if (B != C) return nullptr;
- unsigned M = MI->getOperand(3).getImm();
-
- // Convert to PSHUFD mask.
- M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
-
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
- .addOperand(Dest).addOperand(Src).addImm(M);
- break;
- }
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
@@ -2387,6 +2437,42 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI->getOperand(3).setImm(Size-Amt);
return TargetInstrInfo::commuteInstruction(MI, NewMI);
}
+ case X86::BLENDPDrri:
+ case X86::BLENDPSrri:
+ case X86::PBLENDWrri:
+ case X86::VBLENDPDrri:
+ case X86::VBLENDPSrri:
+ case X86::VBLENDPDYrri:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDrri:
+ case X86::VPBLENDWrri:
+ case X86::VPBLENDDYrri:
+ case X86::VPBLENDWYrri:{
+ unsigned Mask;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::BLENDPDrri: Mask = 0x03; break;
+ case X86::BLENDPSrri: Mask = 0x0F; break;
+ case X86::PBLENDWrri: Mask = 0xFF; break;
+ case X86::VBLENDPDrri: Mask = 0x03; break;
+ case X86::VBLENDPSrri: Mask = 0x0F; break;
+ case X86::VBLENDPDYrri: Mask = 0x0F; break;
+ case X86::VBLENDPSYrri: Mask = 0xFF; break;
+ case X86::VPBLENDDrri: Mask = 0x0F; break;
+ case X86::VPBLENDWrri: Mask = 0xFF; break;
+ case X86::VPBLENDDYrri: Mask = 0xFF; break;
+ case X86::VPBLENDWYrri: Mask = 0xFF; break;
+ }
+ // Only the least significant bits of Imm are used.
+ unsigned Imm = MI->getOperand(3).getImm() & Mask;
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->getOperand(3).setImm(Mask ^ Imm);
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
@@ -2471,6 +2557,20 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
switch (MI->getOpcode()) {
+ case X86::BLENDPDrri:
+ case X86::BLENDPSrri:
+ case X86::PBLENDWrri:
+ case X86::VBLENDPDrri:
+ case X86::VBLENDPSrri:
+ case X86::VBLENDPDYrri:
+ case X86::VBLENDPSYrri:
+ case X86::VPBLENDDrri:
+ case X86::VPBLENDDYrri:
+ case X86::VPBLENDWrri:
+ case X86::VPBLENDWYrri:
+ SrcOpIdx1 = 1;
+ SrcOpIdx2 = 2;
+ return true;
case X86::VFMADDPDr231r:
case X86::VFMADDPSr231r:
case X86::VFMADDSDr231r:
@@ -3067,6 +3167,8 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
inline static bool MaskRegClassContains(unsigned Reg) {
return X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg) ||
+ X86::VK32RegClass.contains(Reg) ||
+ X86::VK64RegClass.contains(Reg) ||
X86::VK1RegClass.contains(Reg);
}
static
@@ -3143,7 +3245,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Moving EFLAGS to / from another register requires a push and a pop.
// Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
+ // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
if (SrcReg == X86::EFLAGS) {
if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
@@ -3287,9 +3389,11 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned =
- (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
+ bool isAligned = (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -3324,9 +3428,11 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned =
- (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
+ bool isAligned = (MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
@@ -3868,10 +3974,10 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
/// operand at the use. We fold the load instructions if load defines a virtual
/// register, the virtual register is used once in the same BB, and the
/// instructions in-between do not load or store, and have no side effects.
-MachineInstr* X86InstrInfo::
-optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
- unsigned &FoldAsLoadDefReg,
- MachineInstr *&DefMI) const {
+MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
+ const MachineRegisterInfo *MRI,
+ unsigned &FoldAsLoadDefReg,
+ MachineInstr *&DefMI) const {
if (FoldAsLoadDefReg == 0)
return nullptr;
// To be conservative, if there exists another load, clear the load candidate.
@@ -3887,55 +3993,35 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
if (!DefMI->isSafeToMove(this, nullptr, SawStore))
return nullptr;
- // We try to commute MI if possible.
- unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
- for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
- // Collect information about virtual register operands of MI.
- unsigned SrcOperandId = 0;
- bool FoundSrcOperand = false;
- for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (Reg != FoldAsLoadDefReg)
- continue;
- // Do not fold if we have a subreg use or a def or multiple uses.
- if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
- return nullptr;
-
- SrcOperandId = i;
- FoundSrcOperand = true;
- }
- if (!FoundSrcOperand) return nullptr;
-
- // Check whether we can fold the def into SrcOperandId.
- SmallVector<unsigned, 8> Ops;
- Ops.push_back(SrcOperandId);
- MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
- if (FoldMI) {
- FoldAsLoadDefReg = 0;
- return FoldMI;
- }
-
- if (Idx == 1) {
- // MI was changed but it didn't help, commute it back!
- commuteInstruction(MI, false);
+ // Collect information about virtual register operands of MI.
+ unsigned SrcOperandId = 0;
+ bool FoundSrcOperand = false;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != FoldAsLoadDefReg)
+ continue;
+ // Do not fold if we have a subreg use or a def or multiple uses.
+ if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
return nullptr;
- }
- // Check whether we can commute MI and enable folding.
- if (MI->isCommutable()) {
- MachineInstr *NewMI = commuteInstruction(MI, false);
- // Unable to commute.
- if (!NewMI) return nullptr;
- if (NewMI != MI) {
- // New instruction. It doesn't need to be kept.
- NewMI->eraseFromParent();
- return nullptr;
- }
- }
+ SrcOperandId = i;
+ FoundSrcOperand = true;
+ }
+ if (!FoundSrcOperand)
+ return nullptr;
+
+ // Check whether we can fold the def into SrcOperandId.
+ SmallVector<unsigned, 8> Ops;
+ Ops.push_back(SrcOperandId);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ if (FoldMI) {
+ FoldAsLoadDefReg = 0;
+ return FoldMI;
}
+
return nullptr;
}
@@ -3961,6 +4047,28 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
return true;
}
+// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
+// code sequence is needed for other targets.
+static void expandLoadStackGuard(MachineInstrBuilder &MIB,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ unsigned Reg = MIB->getOperand(0).getReg();
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
+ unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->
+ getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8);
+ MachineBasicBlock::iterator I = MIB.getInstr();
+
+ BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
+ .addReg(0).addGlobalAddress(GV, 0, X86II::MO_GOTPCREL).addReg(0)
+ .addMemOperand(MMO);
+ MIB->setDebugLoc(DL);
+ MIB->setDesc(TII.get(X86::MOV64rm));
+ MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
bool HasAVX = Subtarget.hasAVX();
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
@@ -3995,6 +4103,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
case X86::KSET1B:
case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
+ case TargetOpcode::LOAD_STACK_GUARD:
+ expandLoadStackGuard(MIB, *this);
+ return true;
}
return false;
}
@@ -4070,7 +4181,8 @@ MachineInstr*
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Align) const {
+ unsigned Size, unsigned Align,
+ bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -4138,8 +4250,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
return nullptr;
// If this is a 64-bit load, but the spill slot is 32, then we can do
- // a 32-bit load which is implicitly zero-extended. This likely is due
- // to liveintervalanalysis remat'ing a load from stack slot.
+ // a 32-bit load which is implicitly zero-extended. This likely is
+ // due to live interval analysis remat'ing a load from stack slot.
if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
return nullptr;
Opcode = X86::MOV32rm;
@@ -4158,8 +4270,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// to a 32-bit one.
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
- NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- X86::sub_32bit));
+ NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, X86::sub_32bit));
else
NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
@@ -4167,6 +4278,65 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
}
+ // If the instruction and target operand are commutable, commute the
+ // instruction and try again.
+ if (AllowCommute) {
+ unsigned OriginalOpIdx = i, CommuteOpIdx1, CommuteOpIdx2;
+ if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
+ bool HasDef = MI->getDesc().getNumDefs();
+ unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
+ unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
+ unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
+ bool Tied0 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
+ bool Tied1 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
+
+ // If either of the commutable operands are tied to the destination
+ // then we can not commute + fold.
+ if ((HasDef && Reg0 == Reg1 && Tied0) ||
+ (HasDef && Reg0 == Reg2 && Tied1))
+ return nullptr;
+
+ if ((CommuteOpIdx1 == OriginalOpIdx) ||
+ (CommuteOpIdx2 == OriginalOpIdx)) {
+ MachineInstr *CommutedMI = commuteInstruction(MI, false);
+ if (!CommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (CommutedMI != MI) {
+ // New instruction. We can't fold from this.
+ CommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Attempt to fold with the commuted version of the instruction.
+ unsigned CommuteOp =
+ (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, Size, Align,
+ /*AllowCommute=*/false);
+ if (NewMI)
+ return NewMI;
+
+ // Folding failed again - undo the commute before returning.
+ MachineInstr *UncommutedMI = commuteInstruction(MI, false);
+ if (!UncommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (UncommutedMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ UncommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Return here to prevent duplicate fuse failure report.
+ return nullptr;
+ }
+ }
+ }
+
// No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
@@ -4350,8 +4520,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
if (!RI.needsStackRealignment(MF))
- Alignment = std::min(
- Alignment, MF.getTarget().getFrameLowering()->getStackAlignment());
+ Alignment = std::min(Alignment, MF.getTarget()
+ .getSubtargetImpl()
+ ->getFrameLowering()
+ ->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -4374,7 +4546,27 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
SmallVector<MachineOperand,4> MOs;
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ Size, Alignment, /*AllowCommute=*/true);
+}
+
+static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
+ const MachineFunction &MF) {
+ unsigned Opc = LoadMI.getOpcode();
+ unsigned RegSize =
+ MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
+
+ if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm) && RegSize > 4)
+ // These instructions only load 32 bits, we can't fold them if the
+ // destination register is wider than 32 bits (4 bytes).
+ return true;
+
+ if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm) && RegSize > 8)
+ // These instructions only load 64 bits, we can't fold them if the
+ // destination register is wider than 64 bits (8 bytes).
+ return true;
+
+ return false;
}
MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -4384,8 +4576,11 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
int FrameIndex;
- if (isLoadFromStackSlot(LoadMI, FrameIndex))
+ if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
+ if (isPartialRegisterLoad(*LoadMI, MF))
+ return nullptr;
return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+ }
// Check switch flag
if (NoFusing) return nullptr;
@@ -4496,19 +4691,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
default: {
- if ((LoadMI->getOpcode() == X86::MOVSSrm ||
- LoadMI->getOpcode() == X86::VMOVSSrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 4)
- // These instructions only load 32 bits, we can't fold them if the
- // destination register is wider than 32 bits (4 bytes).
- return nullptr;
- if ((LoadMI->getOpcode() == X86::MOVSDrm ||
- LoadMI->getOpcode() == X86::VMOVSDrm) &&
- MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
- > 8)
- // These instructions only load 64 bits, we can't fold them if the
- // destination register is wider than 64 bits (8 bytes).
+ if (isPartialRegisterLoad(*LoadMI, MF))
return nullptr;
// Folding a normal load. Just copy the load's address operands.
@@ -4517,7 +4700,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
break;
}
}
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+ return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
+ /*Size=*/0, Alignment, /*AllowCommute=*/true);
}
@@ -5299,16 +5483,32 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+// This code must remain in sync with getJumpInstrTableEntryBound in this class!
+// In particular, getJumpInstrTableEntryBound must always return an upper bound
+// on the encoding lengths of the instructions generated by
+// getUnconditionalBranch and getTrap.
void X86InstrInfo::getUnconditionalBranch(
MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
Branch.setOpcode(X86::JMP_4);
Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
}
+// This code must remain in sync with getJumpInstrTableEntryBound in this class!
+// In particular, getJumpInstrTableEntryBound must always return an upper bound
+// on the encoding lengths of the instructions generated by
+// getUnconditionalBranch and getTrap.
void X86InstrInfo::getTrap(MCInst &MI) const {
MI.setOpcode(X86::TRAP);
}
+// See getTrap and getUnconditionalBranch for conditions on the value returned
+// by this function.
+unsigned X86InstrInfo::getJumpInstrTableEntryBound() const {
+ // 5 bytes suffice: JMP_4 Symbol@PLT is uses 1 byte (E9) for the JMP_4 and 4
+ // bytes for the symbol offset. And TRAP is ud2, which is two bytes (0F 0B).
+ return 5;
+}
+
bool X86InstrInfo::isHighLatencyDef(int opc) const {
switch (opc) {
default: return false;
@@ -5351,10 +5551,10 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VSQRTSSm:
case X86::VSQRTSSm_Int:
case X86::VSQRTSSr:
- case X86::VSQRTPDZrm:
- case X86::VSQRTPDZrr:
- case X86::VSQRTPSZrm:
- case X86::VSQRTPSZrr:
+ case X86::VSQRTPDZm:
+ case X86::VSQRTPDZr:
+ case X86::VSQRTPSZm:
+ case X86::VSQRTPSZr:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
case X86::VSQRTSDZr:
@@ -5426,7 +5626,7 @@ namespace {
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
unsigned PC;
if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
@@ -5524,7 +5724,7 @@ namespace {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to RAX/EAX.
MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
@@ -5545,7 +5745,7 @@ namespace {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
- const X86InstrInfo *TII = TM->getInstrInfo();
+ const X86InstrInfo *TII = TM->getSubtargetImpl()->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();