diff options
author | Amara Emerson <amara.emerson@arm.com> | 2013-09-26 12:22:36 +0000 |
---|---|---|
committer | Amara Emerson <amara.emerson@arm.com> | 2013-09-26 12:22:36 +0000 |
commit | 268c743a3ba44ada364938bc5ff9b1be219df54f (patch) | |
tree | b0799bffe0832dde28ca1d63bf033ec1c22101f2 | |
parent | 9637da60835a60f8ccd8289d04c60b2dcd4b9b5a (diff) | |
download | external_llvm-268c743a3ba44ada364938bc5ff9b1be219df54f.zip external_llvm-268c743a3ba44ada364938bc5ff9b1be219df54f.tar.gz external_llvm-268c743a3ba44ada364938bc5ff9b1be219df54f.tar.bz2 |
[ARM] Use the load-acquire/store-release instructions optimally in AArch32.
Patch by Artyom Skrobov.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191428 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/CodeGen/ISDOpcodes.h | 6 | ||||
-rw-r--r-- | include/llvm/CodeGen/SelectionDAG.h | 7 | ||||
-rw-r--r-- | include/llvm/CodeGen/SelectionDAGNodes.h | 21 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 74 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 156 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 283 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 18 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 332 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 9 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/ARM/atomic-64bit.ll | 16 | ||||
-rw-r--r-- | test/CodeGen/ARM/atomic-ops-v8.ll | 1344 |
12 files changed, 1896 insertions, 372 deletions
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 45bb7e3..3a49dd8 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -604,11 +604,17 @@ namespace ISD { ATOMIC_STORE, /// Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + /// For double-word atomic operations: + /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmpLo, cmpHi, + /// swapLo, swapHi) /// This corresponds to the cmpxchg instruction. ATOMIC_CMP_SWAP, /// Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) /// Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt) + /// For double-word atomic operations: + /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi) + /// ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi) /// These correspond to the atomicrmw instruction. ATOMIC_SWAP, ATOMIC_LOAD_ADD, diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 79e533e..70920d1 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -677,6 +677,13 @@ public: AtomicOrdering Ordering, SynchronizationScope SynchScope); + /// getAtomic - Gets a node for an atomic op, produces result and chain and + /// takes N operands. + SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTList, + SDValue* Ops, unsigned NumOps, MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope); + /// getMemIntrinsicNode - Creates a MemIntrinsicNode that may produce a /// result and takes a list of operands. Opcode may be INTRINSIC_VOID, /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index dc9bfbc..4166340 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1073,6 +1073,7 @@ public: /// class AtomicSDNode : public MemSDNode { SDUse Ops[4]; + SDUse* DynOps; void InitAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope) { // This must match encodeMemSDNodeFlags() in SelectionDAG.cpp. @@ -1100,7 +1101,7 @@ public: SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) - : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { + : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO), DynOps(NULL) { InitAtomic(Ordering, SynchScope); InitOperands(Ops, Chain, Ptr, Cmp, Swp); } @@ -1109,7 +1110,7 @@ public: SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) - : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { + : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO), DynOps(NULL) { InitAtomic(Ordering, SynchScope); InitOperands(Ops, Chain, Ptr, Val); } @@ -1118,10 +1119,22 @@ public: SDValue Chain, SDValue Ptr, MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) - : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { + : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO), DynOps(NULL) { InitAtomic(Ordering, SynchScope); InitOperands(Ops, Chain, Ptr); } + AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL, EVT MemVT, + SDValue* AllOps, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, SynchronizationScope SynchScope) + : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { + DynOps = new SDUse[NumOps]; + InitAtomic(Ordering, SynchScope); + InitOperands(DynOps, AllOps, NumOps); + } + ~AtomicSDNode() { + delete[] DynOps; + } const SDValue &getBasePtr() const { return getOperand(1); } const SDValue &getVal() const { return getOperand(2); } @@ -1852,7 +1865,7 @@ template <> struct GraphTraits<SDNode*> { /// LargestSDNode - The largest SDNode class. /// -typedef LoadSDNode LargestSDNode; +typedef AtomicSDNode LargestSDNode; /// MostAlignedSDNode - The SDNode class with the greatest alignment /// requirement. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3ca1b4f..25f6aac 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4107,6 +4107,29 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue* Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, MemVT, + Ops, NumOps, MMO, Ordering, + SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -4146,23 +4169,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 4); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTs, MemVT, - Chain, Ptr, Cmp, Swp, MMO, - Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4220,23 +4228,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTs, MemVT, - Chain, Ptr, Val, MMO, - Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4279,23 +4272,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast<AtomicSDNode>(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTs, MemVT, - Chain, Ptr, MMO, Ordering, - SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index f6b3827..87d1522 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -253,7 +253,7 @@ private: SDNode *SelectConcatVector(SDNode *N); - SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -2361,23 +2361,36 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } -SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { +SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16,unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast<AtomicSDNode>(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other); + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) { + Op = Op64; + VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other); + } else + llvm_unreachable("Unexpected atomic operation"); + SmallVector<SDValue, 6> Ops; - Ops.push_back(Node->getOperand(1)); // Ptr - Ops.push_back(Node->getOperand(2)); // Low part of Val1 - Ops.push_back(Node->getOperand(3)); // High part of Val1 - if (Opc == ARM::ATOMCMPXCHG6432) { - Ops.push_back(Node->getOperand(4)); // Low part of Val2 - Ops.push_back(Node->getOperand(5)); // High part of Val2 - } - Ops.push_back(Node->getOperand(0)); // Chain - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); - SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), - MVT::i32, MVT::i32, MVT::Other, - Ops); - cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); - return ResNode; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size()); } SDNode *ARMDAGToDAGISel::Select(SDNode *N) { @@ -3251,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); - case ARMISD::ATOMOR64_DAG: - return SelectAtomic64(N, ARM::ATOMOR6432); - case ARMISD::ATOMXOR64_DAG: - return SelectAtomic64(N, ARM::ATOMXOR6432); - case ARMISD::ATOMADD64_DAG: - return SelectAtomic64(N, ARM::ATOMADD6432); - case ARMISD::ATOMSUB64_DAG: - return SelectAtomic64(N, ARM::ATOMSUB6432); - case ARMISD::ATOMNAND64_DAG: - return SelectAtomic64(N, ARM::ATOMNAND6432); - case ARMISD::ATOMAND64_DAG: - return SelectAtomic64(N, ARM::ATOMAND6432); - case ARMISD::ATOMSWAP64_DAG: - return SelectAtomic64(N, ARM::ATOMSWAP6432); - case ARMISD::ATOMCMPXCHG64_DAG: - return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); - - case ARMISD::ATOMMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMMIN6432); - case ARMISD::ATOMUMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMUMIN6432); - case ARMISD::ATOMMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMMAX6432); - case ARMISD::ATOMUMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMUMAX6432); + case ISD::ATOMIC_LOAD: + if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64); + else + break; + + case ISD::ATOMIC_STORE: + if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64); + else + break; + + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_ADD_I8, + ARM::ATOMIC_LOAD_ADD_I16, + ARM::ATOMIC_LOAD_ADD_I32, + ARM::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_SUB_I8, + ARM::ATOMIC_LOAD_SUB_I16, + ARM::ATOMIC_LOAD_SUB_I32, + ARM::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_AND_I8, + ARM::ATOMIC_LOAD_AND_I16, + ARM::ATOMIC_LOAD_AND_I32, + ARM::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_OR_I8, + ARM::ATOMIC_LOAD_OR_I16, + ARM::ATOMIC_LOAD_OR_I32, + ARM::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_XOR_I8, + ARM::ATOMIC_LOAD_XOR_I16, + ARM::ATOMIC_LOAD_XOR_I32, + ARM::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_NAND_I8, + ARM::ATOMIC_LOAD_NAND_I16, + ARM::ATOMIC_LOAD_NAND_I32, + ARM::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MIN_I8, + ARM::ATOMIC_LOAD_MIN_I16, + ARM::ATOMIC_LOAD_MIN_I32, + ARM::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MAX_I8, + ARM::ATOMIC_LOAD_MAX_I16, + ARM::ATOMIC_LOAD_MAX_I32, + ARM::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMIN_I8, + ARM::ATOMIC_LOAD_UMIN_I16, + ARM::ATOMIC_LOAD_UMIN_I32, + ARM::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMAX_I8, + ARM::ATOMIC_LOAD_UMAX_I16, + ARM::ATOMIC_LOAD_UMAX_I32, + ARM::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_SWAP_I8, + ARM::ATOMIC_SWAP_I16, + ARM::ATOMIC_SWAP_I32, + ARM::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_CMP_SWAP_I8, + ARM::ATOMIC_CMP_SWAP_I16, + ARM::ATOMIC_CMP_SWAP_I32, + ARM::ATOMIC_CMP_SWAP_I64); } return SelectCode(N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 773b710..96942ec 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -769,8 +769,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. - setInsertFencesForAtomic(true); + // On v8, we have particularly efficient implementations of atomic fences + // if they can be combined with nearby atomic loads and stores. + if (!Subtarget->hasV8Ops()) { + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); + } + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); + //setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); } else { // Set them all for expansion, which will force libcalls. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); @@ -909,6 +915,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } +static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, + bool isThumb2, unsigned &LdrOpc, + unsigned &StrOpc) { + static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, + {ARM::LDREXH, ARM::t2LDREXH}, + {ARM::LDREX, ARM::t2LDREX}, + {ARM::LDREXD, ARM::t2LDREXD}}; + static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, + {ARM::LDAEXH, ARM::t2LDAEXH}, + {ARM::LDAEX, ARM::t2LDAEX}, + {ARM::LDAEXD, ARM::t2LDAEXD}}; + static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, + {ARM::STREXH, ARM::t2STREXH}, + {ARM::STREX, ARM::t2STREX}, + {ARM::STREXD, ARM::t2STREXD}}; + static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, + {ARM::STLEXH, ARM::t2STLEXH}, + {ARM::STLEX, ARM::t2STLEX}, + {ARM::STLEXD, ARM::t2STLEXD}}; + + const unsigned (*LoadOps)[2], (*StoreOps)[2]; + if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) + LoadOps = LoadAcqs; + else + LoadOps = LoadBares; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + StoreOps = StoreRels; + else + StoreOps = StoreBares; + + assert(isPowerOf2_32(Size) && Size <= 8 && + "unsupported size for atomic binary op!"); + + LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; + StrOpc = StoreOps[Log2_32(Size)][isThumb2]; +} + // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -1094,19 +1138,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; - - case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG"; - case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG"; - case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG"; - case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG"; - case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG"; - case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG"; - case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG"; - case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG"; - case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG"; - case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG"; - case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG"; - case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG"; } } @@ -5922,32 +5953,28 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { static void ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, - SelectionDAG &DAG, unsigned NewOp) { + SelectionDAG &DAG) { SDLoc dl(Node); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); + AtomicSDNode *AN = cast<AtomicSDNode>(Node); SmallVector<SDValue, 6> Ops; Ops.push_back(Node->getOperand(0)); // Chain Ops.push_back(Node->getOperand(1)); // Ptr - // Low part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(0))); - // High part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(1))); - if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { - // High part of Val1 + for(unsigned i=2; i<Node->getNumOperands(); i++) { + // Low part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(0))); - // High part of Val2 + Node->getOperand(i), DAG.getIntPtrConstant(0))); + // High part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(1))); + Node->getOperand(i), DAG.getIntPtrConstant(1))); } SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, - cast<MemSDNode>(Node)->getMemOperand()); + DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), + cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(), + AN->getSynchScope()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); Results.push_back(Result.getValue(2)); @@ -6073,41 +6100,21 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; + case ISD::ATOMIC_STORE: + case ISD::ATOMIC_LOAD: case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); - return; case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); - return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); - return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); - return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); - return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); - return; case ISD::ATOMIC_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); - return; case ISD::ATOMIC_CMP_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); - return; case ISD::ATOMIC_LOAD_MIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); - return; case ISD::ATOMIC_LOAD_UMIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); - return; case ISD::ATOMIC_LOAD_MAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); - return; case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG); return; } if (Res.getNode()) @@ -6127,6 +6134,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned oldval = MI->getOperand(2).getReg(); unsigned newval = MI->getOperand(3).getReg(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6142,21 +6150,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -6236,6 +6230,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6243,24 +6238,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -6344,6 +6326,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); unsigned oldval = dest; + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6351,24 +6334,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc, extendOpc; + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; extendOpc = 0; break; } @@ -6412,7 +6391,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // Sign extend the value, if necessary. if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); + oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRnopcRegClass); + if (!isThumb2) + MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) .addReg(dest) .addImm(0)); @@ -6450,7 +6432,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, bool NeedsCarry, bool IsCmpxchg, bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP, indicated by Op1==0. + // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -6458,11 +6440,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction::iterator It = BB; ++It; + bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); + unsigned offset = (isStore ? -2 : 0); unsigned destlo = MI->getOperand(0).getReg(); unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - unsigned vallo = MI->getOperand(3).getReg(); - unsigned valhi = MI->getOperand(4).getReg(); + unsigned ptr = MI->getOperand(offset+2).getReg(); + unsigned vallo = MI->getOperand(offset+3).getReg(); + unsigned valhi = MI->getOperand(offset+4).getReg(); + unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6475,6 +6461,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6514,21 +6503,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // fallthrough --> exitMBB BB = loopMBB; - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); + if (!isStore) { + // Load + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } } unsigned StoreLo, StoreHi; @@ -6582,7 +6573,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, if (isThumb2) { MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); } else { // Marshal a pair... @@ -6600,7 +6591,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addImm(ARM::gsub_1); // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StorePair).addReg(ptr)); } // Cmp+jump @@ -6621,6 +6612,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + unsigned destlo = MI->getOperand(0).getReg(); + unsigned desthi = MI->getOperand(1).getReg(); + unsigned ptr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm()); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + if (isThumb2) { + MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); + MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); + MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); + + if (isThumb2) { + MIB.addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr); + + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); + + // Copy GPRPair0 into dest. (This copy will normally be coalesced.) + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } + AddDefaultPred(MIB); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -7594,46 +7630,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + case ARM::ATOMIC_LOAD_I64: + return EmitAtomicLoad64(MI, BB); - case ARM::ATOMADD6432: + case ARM::ATOMIC_LOAD_ADD_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, /*NeedsCarry*/ true); - case ARM::ATOMSUB6432: + case ARM::ATOMIC_LOAD_SUB_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true); - case ARM::ATOMOR6432: + case ARM::ATOMIC_LOAD_OR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMXOR6432: + case ARM::ATOMIC_LOAD_XOR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMAND6432: + case ARM::ATOMIC_LOAD_AND_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMSWAP6432: + case ARM::ATOMIC_STORE_I64: + case ARM::ATOMIC_SWAP_I64: return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMCMPXCHG6432: + case ARM::ATOMIC_CMP_SWAP_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMMIN6432: + case ARM::ATOMIC_LOAD_MIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMMAX6432: + case ARM::ATOMIC_LOAD_MAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMUMIN6432: + case ARM::ATOMIC_LOAD_UMIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMUMAX6432: + case ARM::ATOMIC_LOAD_UMAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index fca9e0e..6131a26 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -223,21 +223,7 @@ namespace llvm { VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD, - - // 64-bit atomic ops (value split into two registers) - ATOMADD64_DAG, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMSWAP64_DAG, - ATOMCMPXCHG64_DAG, - ATOMMIN64_DAG, - ATOMUMIN64_DAG, - ATOMMAX64_DAG, - ATOMUMAX64_DAG + VST4LN_UPD }; } @@ -574,6 +560,8 @@ namespace llvm { unsigned Size, bool signExtend, ARMCC::CondCodes Cond) const; + MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI, + MachineBasicBlock *BB) const; void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 80226ac..59d2f7a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1677,48 +1677,6 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. -// (These pseudos use a hand-written selection code). -let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { -def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, - GPR:$set1, GPR:$set2), - NoItinerary, []>; -def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -} - def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary, "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { bits<3> imm; @@ -4329,124 +4287,219 @@ def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } +let usesCustomInserter = 1, Defs = [CPSR] in { + // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in -def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; + def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; -let usesCustomInserter = 1 in { - let Defs = [CPSR] in { +// Atomic pseudo-insts which will be lowered to ldrex/strex loops. +// (64-bit pseudos use a hand-written selection code). + let mayLoad = 1, mayStore = 1 in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>; - - def ATOMIC_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>; - def ATOMIC_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; - - def ATOMIC_CMP_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>; - def ATOMIC_CMP_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_CMP_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>; -} + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_ADD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_SUB_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_AND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_OR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_XOR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_NAND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, + GPR:$set1, GPR:$set2, i32imm:$ordering), + NoItinerary, []>; + } + let mayLoad = 1 in + def ATOMIC_LOAD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, i32imm:$ordering), + NoItinerary, []>; + let mayStore = 1 in + def ATOMIC_STORE_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; } let usesCustomInserter = 1 in { @@ -4560,6 +4613,35 @@ def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), (STREXH GPR:$Rt, addr_offset_none:$addr)>; +class acquiring_load<PatFrag base> + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Acquire || Ordering == SequentiallyConsistent; +}]>; + +def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; +def atomic_load_acquire_16 : acquiring_load<atomic_load_16>; +def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; + +class releasing_store<PatFrag base> + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); + return Ordering == Release || Ordering == SequentiallyConsistent; +}]>; + +def atomic_store_release_8 : releasing_store<atomic_store_8>; +def atomic_store_release_16 : releasing_store<atomic_store_16>; +def atomic_store_release_32 : releasing_store<atomic_store_32>; + +let AddedComplexity = 8 in { + def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; +} + // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fcc8f86..07baf2d 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4216,6 +4216,15 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val), def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val), (t2STRs GPR:$val, t2addrmode_so_reg:$addr)>; +let AddedComplexity = 8 in { + def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>; + def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>; + def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>; + def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>; +} + //===----------------------------------------------------------------------===// // Assembler aliases diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 64001b4..94069cd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -94,7 +94,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { unsigned SubVer = TT[Idx]; if (SubVer == '8') { // FIXME: Parse v8 features - ARMArchFeature = "+v8"; + ARMArchFeature = "+v8,+db"; } else if (SubVer == '7') { if (Len >= Idx+2 && TT[Idx+1] == 'm') { isThumb = true; diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index 06a4df9..0477d4f 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -175,28 +175,14 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ret i64 %r } -; Compiles down to cmpxchg -; FIXME: Should compile to a single ldrexd +; Compiles down to a single ldrexd define i64 @test8(i64* %ptr) { ; CHECK-LABEL: test8: ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK: cmp [[REG1]] -; CHECK: cmpeq [[REG2]] -; CHECK: bne -; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} -; CHECK: cmp -; CHECK: bne ; CHECK: dmb {{ish$}} ; CHECK-THUMB-LABEL: test8: ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] -; CHECK-THUMB: cmp [[REG1]] -; CHECK-THUMB: it eq -; CHECK-THUMB: cmpeq [[REG2]] -; CHECK-THUMB: bne -; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} -; CHECK-THUMB: cmp -; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} %r = load atomic i64* %ptr seq_cst, align 8 diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll new file mode 100644 index 0000000..3f93929 --- /dev/null +++ b/test/CodeGen/ARM/atomic-ops-v8.ll @@ -0,0 +1,1344 @@ +; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i8: + %old = atomicrmw add i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i16: + %old = atomicrmw add i16* @var16, i16 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i32: + %old = atomicrmw add i32* @var32, i32 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_add_i64: + %old = atomicrmw add i64* @var64, i64 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: adc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 +; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i8: + %old = atomicrmw sub i8* @var8, i8 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i16: + %old = atomicrmw sub i16* @var16, i16 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i32: + %old = atomicrmw sub i32* @var32, i32 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_sub_i64: + %old = atomicrmw sub i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: sbc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 +; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i8: + %old = atomicrmw and i8* @var8, i8 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i16: + %old = atomicrmw and i16* @var16, i16 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i32: + %old = atomicrmw and i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_and_i64: + %old = atomicrmw and i64* @var64, i64 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 +; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i8: + %old = atomicrmw or i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i16: + %old = atomicrmw or i16* @var16, i16 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i32: + %old = atomicrmw or i32* @var32, i32 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_or_i64: + %old = atomicrmw or i64* @var64, i64 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 +; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i8: + %old = atomicrmw xor i8* @var8, i8 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i16: + %old = atomicrmw xor i16* @var16, i16 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i32: + %old = atomicrmw xor i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0 +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xor_i64: + %old = atomicrmw xor i64* @var64, i64 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1 +; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i8: + %old = atomicrmw xchg i8* @var8, i8 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i16: + %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i32: + %old = atomicrmw xchg i32* @var32, i32 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_xchg_i64: + %old = atomicrmw xchg i64* @var64, i64 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i8: + %old = atomicrmw min i8* @var8, i8 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp r[[OLDX]], r0 +; Thumb mode: it ge +; CHECK: movge r[[OLDX]], r0 +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i16: + %old = atomicrmw min i16* @var16, i16 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp r[[OLDX]], r0 +; Thumb mode: it ge +; CHECK: movge r[[OLDX]], r0 +; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i32: + %old = atomicrmw min i32* @var32, i32 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it lt +; CHECK: movlt r[[NEW]], r[[OLD]] +; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_min_i64: + %old = atomicrmw min i64* @var64, i64 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 +; CHECK-NEXT: blt .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: +; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i8: + %old = atomicrmw max i8* @var8, i8 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp r[[OLDX]], r0 +; Thumb mode: it le +; CHECK: movle r[[OLDX]], r0 +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i16: + %old = atomicrmw max i16* @var16, i16 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] +; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp r[[OLDX]], r0 +; Thumb mode: it le +; CHECK: movle r[[OLDX]], r0 +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i32: + %old = atomicrmw max i32* @var32, i32 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it gt +; CHECK: movgt r[[NEW]], r[[OLD]] +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_max_i64: + %old = atomicrmw max i64* @var64, i64 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 +; CHECK-NEXT: bge .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: +; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i8: + %old = atomicrmw umin i8* @var8, i8 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it lo +; CHECK: movlo r[[NEW]], r[[OLD]] +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i16: + %old = atomicrmw umin i16* @var16, i16 %offset acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it lo +; CHECK: movlo r[[NEW]], r[[OLD]] +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i32: + %old = atomicrmw umin i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it lo +; CHECK: movlo r[[NEW]], r[[OLD]] +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umin_i64: + %old = atomicrmw umin i64* @var64, i64 %offset acq_rel +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 +; CHECK-NEXT: blo .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: +; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i8: + %old = atomicrmw umax i8* @var8, i8 %offset acq_rel +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it hi +; CHECK: movhi r[[NEW]], r[[OLD]] +; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i16: + %old = atomicrmw umax i16* @var16, i16 %offset monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it hi +; CHECK: movhi r[[NEW]], r[[OLD]] +; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i32: + %old = atomicrmw umax i32* @var32, i32 %offset seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0 +; CHECK-NEXT: cmp r[[OLD]], r0 +; Thumb mode: it hi +; CHECK: movhi r[[NEW]], r[[OLD]] +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { +; CHECK-LABEL: test_atomic_load_umax_i64: + %old = atomicrmw umax i64* @var64, i64 %offset release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0 +; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1 +; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: +; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD1]] +; CHECK-NEXT: mov r1, r[[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i8: + %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp r[[OLD]], r0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: + ; As above, r1 is a reasonable guess. +; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i16: + %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK: movt r[[ADDR]], :upper16:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp r[[OLD]], r0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: + ; As above, r1 is a reasonable guess. +; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i32: + %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32 +; CHECK: movt r[[ADDR]], :upper16:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]] + ; r0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp r[[OLD]], r0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: + ; As above, r1 is a reasonable guess. +; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, r[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { +; CHECK-LABEL: test_atomic_cmpxchg_i64: + %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]] + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp [[OLD1]], r0 +; Thumb mode: it eq +; CHECK: cmpeq [[OLD2]], r1 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 +; CHECK-NEXT: BB#2: + ; As above, r2, r3 is a reasonable guess. +; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + +; CHECK: mov r0, [[OLD1]] +; CHECK-NEXT: mov r1, [[OLD2]] + ret i64 %old +} + +define i8 @test_atomic_load_monotonic_i8() nounwind { +; CHECK-LABEL: test_atomic_load_monotonic_i8: + %val = load atomic i8* @var8 monotonic, align 1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: ldrb r0, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + + ret i8 %val +} + +define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind { +; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8: + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i8* + + %val = load atomic i8* %addr monotonic, align 1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: ldrb r0, [r0, r2] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + + ret i8 %val +} + +define i8 @test_atomic_load_acquire_i8() nounwind { +; CHECK-LABEL: test_atomic_load_acquire_i8: + %val = load atomic i8* @var8 acquire, align 1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: ldab r0, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + ret i8 %val +} + +define i8 @test_atomic_load_seq_cst_i8() nounwind { +; CHECK-LABEL: test_atomic_load_seq_cst_i8: + %val = load atomic i8* @var8 seq_cst, align 1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: ldab r0, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + ret i8 %val +} + +define i16 @test_atomic_load_monotonic_i16() nounwind { +; CHECK-LABEL: test_atomic_load_monotonic_i16: + %val = load atomic i16* @var16 monotonic, align 2 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: ldrh r0, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + + ret i16 %val +} + +define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind { +; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32: + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i32* + + %val = load atomic i32* %addr monotonic, align 4 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: ldr r0, [r0, r2] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + + ret i32 %val +} + +define i64 @test_atomic_load_seq_cst_i64() nounwind { +; CHECK-LABEL: test_atomic_load_seq_cst_i64: + %val = load atomic i64* @var64 seq_cst, align 8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movt r[[ADDR]], :upper16:var64 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: ldaexd r0, r1, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + ret i64 %val +} + +define void @test_atomic_store_monotonic_i8(i8 %val) nounwind { +; CHECK-LABEL: test_atomic_store_monotonic_i8: + store atomic i8 %val, i8* @var8 monotonic, align 1 +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK: strb r0, [r[[ADDR]]] + + ret void +} + +define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind { +; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8: + + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i8* + + store atomic i8 %val, i8* %addr monotonic, align 1 +; CHECK: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp] +; CHECK: strb [[VAL]], [r0, r2] + + ret void +} + +define void @test_atomic_store_release_i8(i8 %val) nounwind { +; CHECK-LABEL: test_atomic_store_release_i8: + store atomic i8 %val, i8* @var8 release, align 1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: stlb r0, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + ret void +} + +define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind { +; CHECK-LABEL: test_atomic_store_seq_cst_i8: + store atomic i8 %val, i8* @var8 seq_cst, align 1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movt r[[ADDR]], :upper16:var8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: stlb r0, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + ret void +} + +define void @test_atomic_store_monotonic_i16(i16 %val) nounwind { +; CHECK-LABEL: test_atomic_store_monotonic_i16: + store atomic i16 %val, i16* @var16 monotonic, align 2 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movt r[[ADDR]], :upper16:var16 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: strh r0, [r[[ADDR]]] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + ret void +} + +define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind { +; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32: + + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i32* + + store atomic i32 %val, i32* %addr monotonic, align 4 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: ldr [[VAL:r[0-9]+]], [sp] +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: str [[VAL]], [r0, r2] +; CHECK-NOT: dmb +; CHECK-NOT: mcr + + ret void +} + +define void @test_atomic_store_release_i64(i64 %val) nounwind { +; CHECK-LABEL: test_atomic_store_release_i64: + store atomic i64 %val, i64* @var64 release, align 8 +; CHECK-NOT: dmb +; CHECK-NOT: mcr +; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64 +; CHECK: movt r[[ADDR]], :upper16:var64 + +; CHECK: .LBB{{[0-9]+}}_1: + ; r0, r1 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]] +; CHECK-NEXT: cmp [[STATUS]], #0 +; CHECK-NEXT: bne .LBB{{[0-9]+}}_1 +; CHECK-NOT: dmb +; CHECK-NOT: mcr + + ret void +} + +define i32 @not.barriers(i32* %var, i1 %cond) { +; CHECK-LABEL: not.barriers: + br i1 %cond, label %atomic_ver, label %simple_ver +simple_ver: + %oldval = load i32* %var + %newval = add nsw i32 %oldval, -1 + store i32 %newval, i32* %var + br label %somewhere +atomic_ver: + fence seq_cst + %val = atomicrmw add i32* %var, i32 -1 monotonic + fence seq_cst + br label %somewhere +; CHECK: dmb +; CHECK: ldrex +; CHECK: dmb + ; The key point here is that the second dmb isn't immediately followed by the + ; simple_ver basic block, which LLVM attempted to do when DMB had been marked + ; with isBarrier. For now, look for something that looks like "somewhere". +; CHECK-NEXT: mov +somewhere: + %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver] + ret i32 %combined +} |