6 files changed, 111 insertions, 150 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index cca6d12..572617c 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
   return TopOfFrameOffset - FrameRegPos;
 }
 
-/// Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF) {
-  // FIXME: Make generic? Really consider after upstreaming.  This code is now
-  // shared between PEI, ARM *and* here.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
-  unsigned MaxAlign = MFI->getMaxAlignment();
-  int Offset = 0;
-
-  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
-  // It really should be refactored to share code. Until then, changes
-  // should keep in mind that there's tight coupling between the two.
-
-  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -MFI->getObjectOffset(i);
-    if (FixedOff > Offset) Offset = FixedOff;
-  }
-  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (MFI->isDeadObjectIndex(i))
-      continue;
-    Offset += MFI->getObjectSize(i);
-    unsigned Align = MFI->getObjectAlignment(i);
-    // Adjust to alignment boundary
-    Offset = (Offset+Align-1)/Align*Align;
-
-    MaxAlign = std::max(Align, MaxAlign);
-  }
-
-  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
-    Offset += MFI->getMaxCallFrameSize();
-
-  // Round up the size to a multiple of the alignment.  If the function has
-  // any calls or alloca's, align to the target's StackAlignment value to
-  // ensure that the callee's frame or the alloca data is suitably aligned;
-  // otherwise, for leaf functions, align to the TransientStackAlignment
-  // value.
-  unsigned StackAlign;
-  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
-      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
-    StackAlign = TFI->getStackAlignment();
-  else
-    StackAlign = TFI->getTransientStackAlignment();
-
-  // If the frame pointer is eliminated, all frame offsets will be relative to
-  // SP not FP. Align to MaxAlign so this works.
-  StackAlign = std::max(StackAlign, MaxAlign);
-  unsigned AlignMask = StackAlign - 1;
-  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
-
-  return (unsigned)Offset;
-}
-
 void
 AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                        RegScavenger *RS) const {
@@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // callee-save register for this purpose or allocate an extra spill slot.
 
   bool BigStack =
-    (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+    (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
     || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
     || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
 
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index cea7f91..e9f4497 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -200,6 +200,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
   setOperationAction(ISD::FSIN, MVT::f32, Expand);
   setOperationAction(ISD::FSIN, MVT::f64, Expand);
 
+  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 
   // Virtually no operation on f128 is legal, but LLVM can't expand them when
   // there's a valid register class, so we need custom operations in most cases.
@@ -217,6 +219,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
   setOperationAction(ISD::FREM,       MVT::f128, Expand);
   setOperationAction(ISD::FRINT,      MVT::f128, Expand);
   setOperationAction(ISD::FSIN,       MVT::f128, Expand);
+  setOperationAction(ISD::FSINCOS,    MVT::f128, Expand);
   setOperationAction(ISD::FSQRT,      MVT::f128, Expand);
   setOperationAction(ISD::FSUB,       MVT::f128, Custom);
   setOperationAction(ISD::FTRUNC,     MVT::f128, Expand);
@@ -341,8 +344,7 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   //   ldxr dest, ptr
   //   <binop> scratch, dest, incr
   //   stxr stxr_status, scratch, ptr
-  //   cmp stxr_status, #0
-  //   b.ne loopMBB
+  //   cbnz stxr_status, loopMBB
   //   fallthrough --> exitMBB
   BB = loopMBB;
   BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
@@ -364,10 +366,8 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
 
   BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
-  BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
-    .addReg(stxr_status).addImm(0);
-  BuildMI(BB, dl, TII->get(AArch64::Bcc))
-    .addImm(A64CC::NE).addMBB(loopMBB);
+  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+    .addReg(stxr_status).addMBB(loopMBB);
 
   BB->addSuccessor(loopMBB);
   BB->addSuccessor(exitMBB);
@@ -437,8 +437,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
   //   cmp incr, dest (, sign extend if necessary)
   //   csel scratch, dest, incr, cond
   //   stxr stxr_status, scratch, ptr
-  //   cmp stxr_status, #0
-  //   b.ne loopMBB
+  //   cbnz stxr_status, loopMBB
   //   fallthrough --> exitMBB
   BB = loopMBB;
   BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
@@ -457,10 +456,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
 
   BuildMI(BB, dl, TII->get(strOpc), stxr_status)
     .addReg(scratch).addReg(ptr);
-  BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
-    .addReg(stxr_status).addImm(0);
-  BuildMI(BB, dl, TII->get(AArch64::Bcc))
-    .addImm(A64CC::NE).addMBB(loopMBB);
+  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+    .addReg(stxr_status).addMBB(loopMBB);
 
   BB->addSuccessor(loopMBB);
   BB->addSuccessor(exitMBB);
@@ -533,17 +530,14 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
 
   // loop2MBB:
   //   strex stxr_status, newval, [ptr]
-  //   cmp stxr_status, #0
-  //   b.ne loop1MBB
+  //   cbnz stxr_status, loop1MBB
   BB = loop2MBB;
   unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
   MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
 
   BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
-  BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp))
-    .addReg(stxr_status).addImm(0);
-  BuildMI(BB, dl, TII->get(AArch64::Bcc))
-    .addImm(A64CC::NE).addMBB(loop1MBB);
+  BuildMI(BB, dl, TII->get(AArch64::CBNZw))
+    .addReg(stxr_status).addMBB(loop1MBB);
   BB->addSuccessor(loop1MBB);
   BB->addSuccessor(exitMBB);
 
@@ -1861,11 +1855,10 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
   const GlobalValue *GV = GN->getGlobal();
   unsigned Alignment = GV->getAlignment();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-
-  if (GV->isWeakForLinker() && RelocM == Reloc::Static) {
-    // Weak symbols can't use ADRP/ADD pair since they should evaluate to
-    // zero when undefined. In PIC mode the GOT can take care of this, but in
-    // absolute mode we use a constant pool load.
+  if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
+    // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
+    // to zero when they remain undefined. In PIC mode the GOT can take care of
+    // this, but in absolute mode we use a constant pool load.
     SDValue PoolAddr;
     PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
                            DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
@@ -1873,10 +1866,16 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
                            DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
                                                      AArch64II::MO_LO12),
                            DAG.getConstant(8, MVT::i32));
-    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
-                       MachinePointerInfo::getConstantPool(),
-                       /*isVolatile=*/ false,  /*isNonTemporal=*/ true,
-                       /*isInvariant=*/ true, 8);
+    SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+                                     MachinePointerInfo::getConstantPool(),
+                                     /*isVolatile=*/ false,
+                                     /*isNonTemporal=*/ true,
+                                     /*isInvariant=*/ true, 8);
+    if (GN->getOffset() != 0)
+      return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+                         DAG.getConstant(GN->getOffset(), PtrVT));
+
+    return GlobalAddr;
   }
 
   if (Alignment == 0) {
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 562a7f6..319ec97 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -159,7 +159,7 @@ let Defs = [XSP], Uses = [XSP] in {
 // Atomic operation pseudo-instructions
 //===----------------------------------------------------------------------===//
 
-let usesCustomInserter = 1, Defs = [NZCV] in {
+let usesCustomInserter = 1 in {
 multiclass AtomicSizes<string opname> {
   def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
     [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>;
@@ -178,11 +178,14 @@ defm ATOMIC_LOAD_AND  : AtomicSizes<"atomic_load_and">;
 defm ATOMIC_LOAD_OR   : AtomicSizes<"atomic_load_or">;
 defm ATOMIC_LOAD_XOR  : AtomicSizes<"atomic_load_xor">;
 defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
-defm ATOMIC_LOAD_MIN  : AtomicSizes<"atomic_load_min">;
-defm ATOMIC_LOAD_MAX  : AtomicSizes<"atomic_load_max">;
-defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
-defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
 defm ATOMIC_SWAP      : AtomicSizes<"atomic_swap">;
+let Defs = [NZCV] in {
+  // These operations need a CMP to calculate the correct value
+  defm ATOMIC_LOAD_MIN  : AtomicSizes<"atomic_load_min">;
+  defm ATOMIC_LOAD_MAX  : AtomicSizes<"atomic_load_max">;
+  defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
+  defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
+}
 
 let usesCustomInserter = 1, Defs = [NZCV] in {
 def ATOMIC_CMP_SWAP_I8
@@ -1942,43 +1945,41 @@ def fpz32 : Operand<f32>,
             ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
   let ParserMatchClass = fpzero_asmoperand;
   let PrintMethod = "printFPZeroOperand";
+  let DecoderMethod = "DecodeFPZeroOperand";
 }
 
 def fpz64 : Operand<f64>,
             ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
   let ParserMatchClass = fpzero_asmoperand;
   let PrintMethod = "printFPZeroOperand";
+  let DecoderMethod = "DecodeFPZeroOperand";
 }
 
-multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2,
-                            dag pattern> {
+multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
   def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
-                          (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2),
-                          [pattern], NoItinerary> {
+                          (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
+                          NoItinerary> {
     let Defs = [NZCV];
   }
 
   def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
-                        (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2),
-                        [], NoItinerary> {
+                        (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> {
     let Defs = [NZCV];
   }
 }
 
-defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm",
+defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
                                (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>;
-defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm",
+defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
                                (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>;
 
-// What would be Rm should be written as 0, but anything is valid for
-// disassembly so we can't set the bits
-let PostEncoderMethod = "fixFCMPImm" in {
-  defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm",
-                              (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>;
+// What would be Rm should be written as 0; note that even though it's called
+// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
+defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
+                               (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Rm))>;
 
-  defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm",
-                              (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>;
-}
+defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
+                               (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Rm))>;
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c1695da..69bb80a 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -160,44 +160,53 @@ private:
 
   SMLoc StartLoc, EndLoc;
 
+  struct ImmWithLSLOp {
+    const MCExpr *Val;
+    unsigned ShiftAmount;
+    bool ImplicitAmount;
+  };
+
+  struct CondCodeOp {
+    A64CC::CondCodes Code;
+  };
+
+  struct FPImmOp {
+    double Val;
+  };
+
+  struct ImmOp {
+    const MCExpr *Val;
+  };
+
+  struct RegOp {
+    unsigned RegNum;
+  };
+
+  struct ShiftExtendOp {
+    A64SE::ShiftExtSpecifiers ShiftType;
+    unsigned Amount;
+    bool ImplicitAmount;
+  };
+
+  struct SysRegOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
   union {
-    struct {
-      const MCExpr *Val;
-      unsigned ShiftAmount;
-      bool ImplicitAmount;
-    } ImmWithLSL;
-
-    struct {
-      A64CC::CondCodes Code;
-    } CondCode;
-
-    struct {
-      double Val;
-    } FPImm;
-
-    struct {
-      const MCExpr *Val;
-    } Imm;
-
-    struct {
-      unsigned RegNum;
-    } Reg;
-
-    struct {
-      A64SE::ShiftExtSpecifiers ShiftType;
-      unsigned Amount;
-      bool ImplicitAmount;
-    } ShiftExtend;
-
-    struct {
-      const char *Data;
-      unsigned Length;
-    } SysReg;
-
-    struct {
-      const char *Data;
-      unsigned Length;
-    } Tok;
+    struct ImmWithLSLOp ImmWithLSL;
+    struct CondCodeOp CondCode;
+    struct FPImmOp FPImm;
+    struct ImmOp Imm;
+    struct RegOp Reg;
+    struct ShiftExtendOp ShiftExtend;
+    struct SysRegOp SysReg;
+    struct TokOp Tok;
   };
 
   AArch64Operand(KindTy K, SMLoc S, SMLoc E)
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index eba7666..12c1b8f 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -106,6 +106,11 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
                                                uint64_t Address,
                                                const void *Decoder);
 
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+                                        unsigned RmBits,
+                                        uint64_t Address,
+                                        const void *Decoder);
+
 template<int RegWidth>
 static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
                                              unsigned FullImm,
@@ -381,6 +386,17 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
+                                        unsigned RmBits,
+                                        uint64_t Address,
+                                        const void *Decoder) {
+  // Any bits are valid in the instruction (they're architecturally ignored),
+  // but a code generator should insert 0.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  return MCDisassembler::Success;
+}
+
+
 
 template<int RegWidth>
 static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 756e037..a5c591e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -106,8 +106,6 @@ public:
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups) const;
 
-  unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const;
-
   template<int hasRs, int hasRt2> unsigned
   fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const;
 
@@ -423,15 +421,6 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
   return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups);
 }
 
-unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI,
-                                          unsigned EncodedValue) const {
-    // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation
-    // with 0s, but is architecturally ignored
-    EncodedValue &= ~0x1f0000u;
-
-    return EncodedValue;
-}
-
 template<int hasRs, int hasRt2> unsigned
 AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
                                             unsigned EncodedValue) const {