aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmara Emerson <amara.emerson@arm.com>2013-09-26 12:22:36 +0000
committerAmara Emerson <amara.emerson@arm.com>2013-09-26 12:22:36 +0000
commit268c743a3ba44ada364938bc5ff9b1be219df54f (patch)
treeb0799bffe0832dde28ca1d63bf033ec1c22101f2
parent9637da60835a60f8ccd8289d04c60b2dcd4b9b5a (diff)
downloadexternal_llvm-268c743a3ba44ada364938bc5ff9b1be219df54f.zip
external_llvm-268c743a3ba44ada364938bc5ff9b1be219df54f.tar.gz
external_llvm-268c743a3ba44ada364938bc5ff9b1be219df54f.tar.bz2
[ARM] Use the load-acquire/store-release instructions optimally in AArch32.
Patch by Artyom Skrobov. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191428 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/ISDOpcodes.h6
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h7
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp74
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp156
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp283
-rw-r--r--lib/Target/ARM/ARMISelLowering.h18
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td332
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td9
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp2
-rw-r--r--test/CodeGen/ARM/atomic-64bit.ll16
-rw-r--r--test/CodeGen/ARM/atomic-ops-v8.ll1344
12 files changed, 1896 insertions, 372 deletions
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 45bb7e3..3a49dd8 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -604,11 +604,17 @@ namespace ISD {
ATOMIC_STORE,
/// Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
+ /// For double-word atomic operations:
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmpLo, cmpHi,
+ /// swapLo, swapHi)
/// This corresponds to the cmpxchg instruction.
ATOMIC_CMP_SWAP,
/// Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt)
/// Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt)
+ /// For double-word atomic operations:
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amtLo, amtHi)
+ /// ValLo, ValHi, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amtLo, amtHi)
/// These correspond to the atomicrmw instruction.
ATOMIC_SWAP,
ATOMIC_LOAD_ADD,
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 79e533e..70920d1 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -677,6 +677,13 @@ public:
AtomicOrdering Ordering,
SynchronizationScope SynchScope);
+ /// getAtomic - Gets a node for an atomic op, produces result and chain and
+ /// takes N operands.
+ SDValue getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTList,
+ SDValue* Ops, unsigned NumOps, MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope);
+
/// getMemIntrinsicNode - Creates a MemIntrinsicNode that may produce a
/// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
/// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index dc9bfbc..4166340 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1073,6 +1073,7 @@ public:
///
class AtomicSDNode : public MemSDNode {
SDUse Ops[4];
+ SDUse* DynOps;
void InitAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope) {
// This must match encodeMemSDNodeFlags() in SelectionDAG.cpp.
@@ -1100,7 +1101,7 @@ public:
SDValue Chain, SDValue Ptr,
SDValue Cmp, SDValue Swp, MachineMemOperand *MMO,
AtomicOrdering Ordering, SynchronizationScope SynchScope)
- : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+ : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO), DynOps(NULL) {
InitAtomic(Ordering, SynchScope);
InitOperands(Ops, Chain, Ptr, Cmp, Swp);
}
@@ -1109,7 +1110,7 @@ public:
SDValue Chain, SDValue Ptr,
SDValue Val, MachineMemOperand *MMO,
AtomicOrdering Ordering, SynchronizationScope SynchScope)
- : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+ : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO), DynOps(NULL) {
InitAtomic(Ordering, SynchScope);
InitOperands(Ops, Chain, Ptr, Val);
}
@@ -1118,10 +1119,22 @@ public:
SDValue Chain, SDValue Ptr,
MachineMemOperand *MMO,
AtomicOrdering Ordering, SynchronizationScope SynchScope)
- : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+ : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO), DynOps(NULL) {
InitAtomic(Ordering, SynchScope);
InitOperands(Ops, Chain, Ptr);
}
+ AtomicSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTL, EVT MemVT,
+ SDValue* AllOps, unsigned NumOps,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering, SynchronizationScope SynchScope)
+ : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
+ DynOps = new SDUse[NumOps];
+ InitAtomic(Ordering, SynchScope);
+ InitOperands(DynOps, AllOps, NumOps);
+ }
+ ~AtomicSDNode() {
+ delete[] DynOps;
+ }
const SDValue &getBasePtr() const { return getOperand(1); }
const SDValue &getVal() const { return getOperand(2); }
@@ -1852,7 +1865,7 @@ template <> struct GraphTraits<SDNode*> {
/// LargestSDNode - The largest SDNode class.
///
-typedef LoadSDNode LargestSDNode;
+typedef AtomicSDNode LargestSDNode;
/// MostAlignedSDNode - The SDNode class with the greatest alignment
/// requirement.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 3ca1b4f..25f6aac 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4107,6 +4107,29 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
+ SDVTList VTList, SDValue* Ops, unsigned NumOps,
+ MachineMemOperand *MMO,
+ AtomicOrdering Ordering,
+ SynchronizationScope SynchScope) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(MemVT.getRawBits());
+ AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void* IP = 0;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
+ dl.getDebugLoc(), VTList, MemVT,
+ Ops, NumOps, MMO, Ordering,
+ SynchScope);
+ CSEMap.InsertNode(N, IP);
+ AllNodes.push_back(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Cmp,
SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment,
@@ -4146,23 +4169,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
EVT VT = Cmp.getValueType();
SDVTList VTs = getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
- dl.getDebugLoc(), VTs, MemVT,
- Chain, Ptr, Cmp, Swp, MMO,
- Ordering, SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -4220,23 +4228,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr, Val};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
- dl.getDebugLoc(), VTs, MemVT,
- Chain, Ptr, Val, MMO,
- Ordering, SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -4279,23 +4272,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
SDVTList VTs = getVTList(VT, MVT::Other);
- FoldingSetNodeID ID;
- ID.AddInteger(MemVT.getRawBits());
SDValue Ops[] = {Chain, Ptr};
- AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
- ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
- cast<AtomicSDNode>(E)->refineAlignment(MMO);
- return SDValue(E, 0);
- }
- SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
- dl.getDebugLoc(), VTs, MemVT,
- Chain, Ptr, MMO, Ordering,
- SynchScope);
- CSEMap.InsertNode(N, IP);
- AllNodes.push_back(N);
- return SDValue(N, 0);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope);
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index f6b3827..87d1522 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -253,7 +253,7 @@ private:
SDNode *SelectConcatVector(SDNode *N);
- SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -2361,23 +2361,36 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
-SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+ unsigned Op16,unsigned Op32,
+ unsigned Op64) {
+ // Mostly direct translation to the given operations, except that we preserve
+ // the AtomicOrdering for use later on.
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ EVT VT = AN->getMemoryVT();
+
+ unsigned Op;
+ SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
+ if (VT == MVT::i8)
+ Op = Op8;
+ else if (VT == MVT::i16)
+ Op = Op16;
+ else if (VT == MVT::i32)
+ Op = Op32;
+ else if (VT == MVT::i64) {
+ Op = Op64;
+ VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
+ } else
+ llvm_unreachable("Unexpected atomic operation");
+
SmallVector<SDValue, 6> Ops;
- Ops.push_back(Node->getOperand(1)); // Ptr
- Ops.push_back(Node->getOperand(2)); // Low part of Val1
- Ops.push_back(Node->getOperand(3)); // High part of Val1
- if (Opc == ARM::ATOMCMPXCHG6432) {
- Ops.push_back(Node->getOperand(4)); // Low part of Val2
- Ops.push_back(Node->getOperand(5)); // High part of Val2
- }
- Ops.push_back(Node->getOperand(0)); // Chain
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
- MVT::i32, MVT::i32, MVT::Other,
- Ops);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
- return ResNode;
+ for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+ Ops.push_back(AN->getOperand(i));
+
+ Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+ Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+ return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
@@ -3251,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
- case ARMISD::ATOMOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMOR6432);
- case ARMISD::ATOMXOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMXOR6432);
- case ARMISD::ATOMADD64_DAG:
- return SelectAtomic64(N, ARM::ATOMADD6432);
- case ARMISD::ATOMSUB64_DAG:
- return SelectAtomic64(N, ARM::ATOMSUB6432);
- case ARMISD::ATOMNAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMNAND6432);
- case ARMISD::ATOMAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMAND6432);
- case ARMISD::ATOMSWAP64_DAG:
- return SelectAtomic64(N, ARM::ATOMSWAP6432);
- case ARMISD::ATOMCMPXCHG64_DAG:
- return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
-
- case ARMISD::ATOMMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMMIN6432);
- case ARMISD::ATOMUMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMIN6432);
- case ARMISD::ATOMMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMMAX6432);
- case ARMISD::ATOMUMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMAX6432);
+ case ISD::ATOMIC_LOAD:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_STORE:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_ADD_I8,
+ ARM::ATOMIC_LOAD_ADD_I16,
+ ARM::ATOMIC_LOAD_ADD_I32,
+ ARM::ATOMIC_LOAD_ADD_I64);
+ case ISD::ATOMIC_LOAD_SUB:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_SUB_I8,
+ ARM::ATOMIC_LOAD_SUB_I16,
+ ARM::ATOMIC_LOAD_SUB_I32,
+ ARM::ATOMIC_LOAD_SUB_I64);
+ case ISD::ATOMIC_LOAD_AND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_AND_I8,
+ ARM::ATOMIC_LOAD_AND_I16,
+ ARM::ATOMIC_LOAD_AND_I32,
+ ARM::ATOMIC_LOAD_AND_I64);
+ case ISD::ATOMIC_LOAD_OR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_OR_I8,
+ ARM::ATOMIC_LOAD_OR_I16,
+ ARM::ATOMIC_LOAD_OR_I32,
+ ARM::ATOMIC_LOAD_OR_I64);
+ case ISD::ATOMIC_LOAD_XOR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_XOR_I8,
+ ARM::ATOMIC_LOAD_XOR_I16,
+ ARM::ATOMIC_LOAD_XOR_I32,
+ ARM::ATOMIC_LOAD_XOR_I64);
+ case ISD::ATOMIC_LOAD_NAND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_NAND_I8,
+ ARM::ATOMIC_LOAD_NAND_I16,
+ ARM::ATOMIC_LOAD_NAND_I32,
+ ARM::ATOMIC_LOAD_NAND_I64);
+ case ISD::ATOMIC_LOAD_MIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MIN_I8,
+ ARM::ATOMIC_LOAD_MIN_I16,
+ ARM::ATOMIC_LOAD_MIN_I32,
+ ARM::ATOMIC_LOAD_MIN_I64);
+ case ISD::ATOMIC_LOAD_MAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MAX_I8,
+ ARM::ATOMIC_LOAD_MAX_I16,
+ ARM::ATOMIC_LOAD_MAX_I32,
+ ARM::ATOMIC_LOAD_MAX_I64);
+ case ISD::ATOMIC_LOAD_UMIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMIN_I8,
+ ARM::ATOMIC_LOAD_UMIN_I16,
+ ARM::ATOMIC_LOAD_UMIN_I32,
+ ARM::ATOMIC_LOAD_UMIN_I64);
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMAX_I8,
+ ARM::ATOMIC_LOAD_UMAX_I16,
+ ARM::ATOMIC_LOAD_UMAX_I32,
+ ARM::ATOMIC_LOAD_UMAX_I64);
+ case ISD::ATOMIC_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_SWAP_I8,
+ ARM::ATOMIC_SWAP_I16,
+ ARM::ATOMIC_SWAP_I32,
+ ARM::ATOMIC_SWAP_I64);
+ case ISD::ATOMIC_CMP_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_CMP_SWAP_I8,
+ ARM::ATOMIC_CMP_SWAP_I16,
+ ARM::ATOMIC_CMP_SWAP_I32,
+ ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 773b710..96942ec 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -769,8 +769,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ // On v8, we have particularly efficient implementations of atomic fences
+ // if they can be combined with nearby atomic loads and stores.
+ if (!Subtarget->hasV8Ops()) {
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ }
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
+ //setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom);
} else {
// Set them all for expansion, which will force libcalls.
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
@@ -909,6 +915,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+ bool isThumb2, unsigned &LdrOpc,
+ unsigned &StrOpc) {
+ static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
+ {ARM::LDREXH, ARM::t2LDREXH},
+ {ARM::LDREX, ARM::t2LDREX},
+ {ARM::LDREXD, ARM::t2LDREXD}};
+ static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
+ {ARM::LDAEXH, ARM::t2LDAEXH},
+ {ARM::LDAEX, ARM::t2LDAEX},
+ {ARM::LDAEXD, ARM::t2LDAEXD}};
+ static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
+ {ARM::STREXH, ARM::t2STREXH},
+ {ARM::STREX, ARM::t2STREX},
+ {ARM::STREXD, ARM::t2STREXD}};
+ static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
+ {ARM::STLEXH, ARM::t2STLEXH},
+ {ARM::STLEX, ARM::t2STLEX},
+ {ARM::STLEXD, ARM::t2STLEXD}};
+
+ const unsigned (*LoadOps)[2], (*StoreOps)[2];
+ if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ LoadOps = LoadAcqs;
+ else
+ LoadOps = LoadBares;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ StoreOps = StoreRels;
+ else
+ StoreOps = StoreBares;
+
+ assert(isPowerOf2_32(Size) && Size <= 8 &&
+ "unsupported size for atomic binary op!");
+
+ LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
+ StrOpc = StoreOps[Log2_32(Size)][isThumb2];
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -1094,19 +1138,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
-
- case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG";
- case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG";
- case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG";
- case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG";
- case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG";
- case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG";
- case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG";
- case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG";
- case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG";
- case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG";
- case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG";
- case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG";
}
}
@@ -5922,32 +5953,28 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
static void
ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG, unsigned NewOp) {
+ SelectionDAG &DAG) {
SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
SmallVector<SDValue, 6> Ops;
Ops.push_back(Node->getOperand(0)); // Chain
Ops.push_back(Node->getOperand(1)); // Ptr
- // Low part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0)));
- // High part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1)));
- if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
- // High part of Val1
+ for(unsigned i=2; i<Node->getNumOperands(); i++) {
+ // Low part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(0)));
- // High part of Val2
+ Node->getOperand(i), DAG.getIntPtrConstant(0)));
+ // High part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ Node->getOperand(i), DAG.getIntPtrConstant(1)));
}
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
+ DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
+ cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(),
+ AN->getSynchScope());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
@@ -6073,41 +6100,21 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
- return;
case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
- return;
case ISD::ATOMIC_CMP_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
- return;
case ISD::ATOMIC_LOAD_MIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_MAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ ReplaceATOMIC_OP_64(N, Results, DAG);
return;
}
if (Res.getNode())
@@ -6127,6 +6134,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6142,21 +6150,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6236,6 +6230,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6243,24 +6238,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -6344,6 +6326,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
unsigned oldval = dest;
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6351,24 +6334,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
extendOpc = 0;
break;
}
@@ -6412,7 +6391,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRnopcRegClass);
+ if (!isThumb2)
+ MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
@@ -6450,7 +6432,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Op1, unsigned Op2,
bool NeedsCarry, bool IsCmpxchg,
bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6458,11 +6440,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction::iterator It = BB;
++It;
+ bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64);
+ unsigned offset = (isStore ? -2 : 0);
unsigned destlo = MI->getOperand(0).getReg();
unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- unsigned vallo = MI->getOperand(3).getReg();
- unsigned valhi = MI->getOperand(4).getReg();
+ unsigned ptr = MI->getOperand(offset+2).getReg();
+ unsigned vallo = MI->getOperand(offset+3).getReg();
+ unsigned valhi = MI->getOperand(offset+4).getReg();
+ unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5);
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6475,6 +6461,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
}
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
@@ -6514,21 +6503,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// fallthrough --> exitMBB
BB = loopMBB;
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
+ if (!isStore) {
+ // Load
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
}
unsigned StoreLo, StoreHi;
@@ -6582,7 +6573,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
if (isThumb2) {
MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StoreLo).addReg(StoreHi).addReg(ptr));
} else {
// Marshal a pair...
@@ -6600,7 +6591,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
.addImm(ARM::gsub_1);
// ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StorePair).addReg(ptr));
}
// Cmp+jump
@@ -6621,6 +6612,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
+
+ if (isThumb2) {
+ MIB.addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr);
+
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
+
+ // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+ AddDefaultPred(MIB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -7594,46 +7630,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+ case ARM::ATOMIC_LOAD_I64:
+ return EmitAtomicLoad64(MI, BB);
- case ARM::ATOMADD6432:
+ case ARM::ATOMIC_LOAD_ADD_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMSUB6432:
+ case ARM::ATOMIC_LOAD_SUB_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMOR6432:
+ case ARM::ATOMIC_LOAD_OR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMXOR6432:
+ case ARM::ATOMIC_LOAD_XOR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMAND6432:
+ case ARM::ATOMIC_LOAD_AND_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMSWAP6432:
+ case ARM::ATOMIC_STORE_I64:
+ case ARM::ATOMIC_SWAP_I64:
return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMCMPXCHG6432:
+ case ARM::ATOMIC_CMP_SWAP_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMMIN6432:
+ case ARM::ATOMIC_LOAD_MIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMMAX6432:
+ case ARM::ATOMIC_LOAD_MAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMUMIN6432:
+ case ARM::ATOMIC_LOAD_UMIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMUMAX6432:
+ case ARM::ATOMIC_LOAD_UMAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index fca9e0e..6131a26 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -223,21 +223,7 @@ namespace llvm {
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD,
-
- // 64-bit atomic ops (value split into two registers)
- ATOMADD64_DAG,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMSWAP64_DAG,
- ATOMCMPXCHG64_DAG,
- ATOMMIN64_DAG,
- ATOMUMIN64_DAG,
- ATOMMAX64_DAG,
- ATOMUMAX64_DAG
+ VST4LN_UPD
};
}
@@ -574,6 +560,8 @@ namespace llvm {
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
+ MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 80226ac..59d2f7a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1677,48 +1677,6 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
-// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These pseudos use a hand-written selection code).
-let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
-def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
- GPR:$set1, GPR:$set2),
- NoItinerary, []>;
-def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-}
-
def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
"hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
bits<3> imm;
@@ -4329,124 +4287,219 @@ def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
+let usesCustomInserter = 1, Defs = [CPSR] in {
+
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in
-def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
+ def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-let usesCustomInserter = 1 in {
- let Defs = [CPSR] in {
+// Atomic pseudo-insts which will be lowered to ldrex/strex loops.
+// (64-bit pseudos use a hand-written selection code).
+ let mayLoad = 1, mayStore = 1 in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
-
- def ATOMIC_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
- def ATOMIC_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
-
- def ATOMIC_CMP_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
- def ATOMIC_CMP_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_CMP_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
-}
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_ADD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_SUB_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_AND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_OR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_XOR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_NAND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2, i32imm:$ordering),
+ NoItinerary, []>;
+ }
+ let mayLoad = 1 in
+ def ATOMIC_LOAD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, i32imm:$ordering),
+ NoItinerary, []>;
+ let mayStore = 1 in
+ def ATOMIC_STORE_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
}
let usesCustomInserter = 1 in {
@@ -4560,6 +4613,35 @@ def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
(STREXH GPR:$Rt, addr_offset_none:$addr)>;
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Acquire || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Release || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+
+let AddedComplexity = 8 in {
+ def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
+}
+
// SWP/SWPB are deprecated in V6/V7.
let mayLoad = 1, mayStore = 1 in {
def SWP : AIswp<0, (outs GPRnopc:$Rt),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index fcc8f86..07baf2d 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -4216,6 +4216,15 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+let AddedComplexity = 8 in {
+ def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
+}
+
//===----------------------------------------------------------------------===//
// Assembler aliases
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 64001b4..94069cd 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -94,7 +94,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
unsigned SubVer = TT[Idx];
if (SubVer == '8') {
// FIXME: Parse v8 features
- ARMArchFeature = "+v8";
+ ARMArchFeature = "+v8,+db";
} else if (SubVer == '7') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
isThumb = true;
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 06a4df9..0477d4f 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -175,28 +175,14 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
ret i64 %r
}
-; Compiles down to cmpxchg
-; FIXME: Should compile to a single ldrexd
+; Compiles down to a single ldrexd
define i64 @test8(i64* %ptr) {
; CHECK-LABEL: test8:
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK: cmp [[REG1]]
-; CHECK: cmpeq [[REG2]]
-; CHECK: bne
-; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
-; CHECK: cmp
-; CHECK: bne
; CHECK: dmb {{ish$}}
; CHECK-THUMB-LABEL: test8:
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
-; CHECK-THUMB: cmp [[REG1]]
-; CHECK-THUMB: it eq
-; CHECK-THUMB: cmpeq [[REG2]]
-; CHECK-THUMB: bne
-; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
-; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
; CHECK-THUMB: dmb {{ish$}}
%r = load atomic i64* %ptr seq_cst, align 8
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
new file mode 100644
index 0000000..3f93929
--- /dev/null
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -0,0 +1,1344 @@
+; RUN: llc -mtriple=armv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i8:
+ %old = atomicrmw add i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i16:
+ %old = atomicrmw add i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i32:
+ %old = atomicrmw add i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: add{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_add_i64:
+ %old = atomicrmw add i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: adds [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: adc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i8:
+ %old = atomicrmw sub i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i16:
+ %old = atomicrmw sub i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i32:
+ %old = atomicrmw sub i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: sub{{s?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_sub_i64:
+ %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: subs [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbc{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i8:
+ %old = atomicrmw and i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i16:
+ %old = atomicrmw and i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i32:
+ %old = atomicrmw and i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_and_i64:
+ %old = atomicrmw and i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: and{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: and{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i8:
+ %old = atomicrmw or i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i16:
+ %old = atomicrmw or i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i32:
+ %old = atomicrmw or i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_or_i64:
+ %old = atomicrmw or i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: orr{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i8:
+ %old = atomicrmw xor i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i16:
+ %old = atomicrmw xor i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i32:
+ %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW:r[0-9]+]], r[[OLD]], r0
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], [[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xor_i64:
+ %old = atomicrmw xor i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW1:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: eor{{(\.w)?}} [[NEW2:r[0-9]+]], r[[OLD2]], r1
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], [[NEW1]], [[NEW2]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i8:
+ %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i16:
+ %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i32:
+ %old = atomicrmw xchg i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r0, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_xchg_i64:
+ %old = atomicrmw xchg i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i8:
+ %old = atomicrmw min i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ge
+; CHECK: movge r[[OLDX]], r0
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i16:
+ %old = atomicrmw min i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it ge
+; CHECK: movge r[[OLDX]], r0
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i32:
+ %old = atomicrmw min i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lt
+; CHECK: movlt r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_min_i64:
+ %old = atomicrmw min i64* @var64, i64 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: blt .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i8:
+ %old = atomicrmw max i8* @var8, i8 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxtb r[[OLDX:[0-9]+]], r[[OLD]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it le
+; CHECK: movle r[[OLDX]], r0
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i16:
+ %old = atomicrmw max i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+; CHECK-NEXT: sxth r[[OLDX:[0-9]+]], r[[OLD]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp r[[OLDX]], r0
+; Thumb mode: it le
+; CHECK: movle r[[OLDX]], r0
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[OLDX]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i32:
+ %old = atomicrmw max i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it gt
+; CHECK: movgt r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_max_i64:
+ %old = atomicrmw max i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: bge .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i8:
+ %old = atomicrmw umin i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK: movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i16:
+ %old = atomicrmw umin i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK: movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i32:
+ %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it lo
+; CHECK: movlo r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umin_i64:
+ %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: blo .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i8:
+ %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK: movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlexb [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i16:
+ %old = atomicrmw umax i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK: movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: strexh [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i32:
+ %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: mov r[[NEW:[0-9]+]], r0
+; CHECK-NEXT: cmp r[[OLD]], r0
+; Thumb mode: it hi
+; CHECK: movhi r[[NEW]], r[[OLD]]
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r[[NEW]], [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
+; CHECK-LABEL: test_atomic_load_umax_i64:
+ %old = atomicrmw umax i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd r[[OLD1:[0-9]+]], r[[OLD2:[0-9]+]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: subs [[NEW:r[0-9]+]], r[[OLD1]], r0
+; CHECK-NEXT: sbcs{{(\.w)?}} [[NEW]], r[[OLD2]], r1
+; CHECK-NEXT: bhs .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD1]]
+; CHECK-NEXT: mov r1, r[[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i8:
+ %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+ ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i8 %old
+}
+
+define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i16:
+ %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK: movt r[[ADDR]], :upper16:var16
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+ ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i16 %old
+}
+
+define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i32:
+ %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var32
+; CHECK: movt r[[ADDR]], :upper16:var32
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrex r[[OLD:[0-9]+]], [r[[ADDR]]]
+ ; r0 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+ ; As above, r1 is a reasonable guess.
+; CHECK-NEXT: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, r[[OLD]]
+ ret i32 %old
+}
+
+define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i64:
+ %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+; CHECK-NEXT: ldrexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK-NEXT: cmp [[OLD1]], r0
+; Thumb mode: it eq
+; CHECK: cmpeq [[OLD2]], r1
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
+; CHECK-NEXT: BB#2:
+ ; As above, r2, r3 is a reasonable guess.
+; CHECK-NEXT: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+; CHECK: mov r0, [[OLD1]]
+; CHECK-NEXT: mov r1, [[OLD2]]
+ ret i64 %old
+}
+
+define i8 @test_atomic_load_monotonic_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_i8:
+ %val = load atomic i8* @var8 monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: ldrb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+ ret i8 %val
+}
+
+define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i8:
+ %addr_int = add i64 %base, %off
+ %addr = inttoptr i64 %addr_int to i8*
+
+ %val = load atomic i8* %addr monotonic, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldrb r0, [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+ ret i8 %val
+}
+
+define i8 @test_atomic_load_acquire_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_acquire_i8:
+ %val = load atomic i8* @var8 acquire, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldab r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+ ret i8 %val
+}
+
+define i8 @test_atomic_load_seq_cst_i8() nounwind {
+; CHECK-LABEL: test_atomic_load_seq_cst_i8:
+ %val = load atomic i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldab r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+ ret i8 %val
+}
+
+define i16 @test_atomic_load_monotonic_i16() nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_i16:
+ %val = load atomic i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldrh r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+ ret i16 %val
+}
+
+define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind {
+; CHECK-LABEL: test_atomic_load_monotonic_regoff_i32:
+ %addr_int = add i64 %base, %off
+ %addr = inttoptr i64 %addr_int to i32*
+
+ %val = load atomic i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldr r0, [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+ ret i32 %val
+}
+
+define i64 @test_atomic_load_seq_cst_i64() nounwind {
+; CHECK-LABEL: test_atomic_load_seq_cst_i64:
+ %val = load atomic i64* @var64 seq_cst, align 8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var64
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldaexd r0, r1, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+ ret i64 %val
+}
+
+define void @test_atomic_store_monotonic_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_i8:
+ store atomic i8 %val, i8* @var8 monotonic, align 1
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK: strb r0, [r[[ADDR]]]
+
+ ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i8:
+
+ %addr_int = add i64 %base, %off
+ %addr = inttoptr i64 %addr_int to i8*
+
+ store atomic i8 %val, i8* %addr monotonic, align 1
+; CHECK: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
+; CHECK: strb [[VAL]], [r0, r2]
+
+ ret void
+}
+
+define void @test_atomic_store_release_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_release_i8:
+ store atomic i8 %val, i8* @var8 release, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: stlb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+ ret void
+}
+
+define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_seq_cst_i8:
+ store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: stlb r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+ ret void
+}
+
+define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_i16:
+ store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: strh r0, [r[[ADDR]]]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+ ret void
+}
+
+define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_monotonic_regoff_i32:
+
+ %addr_int = add i64 %base, %off
+ %addr = inttoptr i64 %addr_int to i32*
+
+ store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: ldr [[VAL:r[0-9]+]], [sp]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: str [[VAL]], [r0, r2]
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+ ret void
+}
+
+define void @test_atomic_store_release_i64(i64 %val) nounwind {
+; CHECK-LABEL: test_atomic_store_release_i64:
+ store atomic i64 %val, i64* @var64 release, align 8
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
+; CHECK: movt r[[ADDR]], :upper16:var64
+
+; CHECK: .LBB{{[0-9]+}}_1:
+ ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
+ ; function there.
+; CHECK: stlexd [[STATUS:r[0-9]+]], r0, r1, [r[[ADDR]]]
+; CHECK-NEXT: cmp [[STATUS]], #0
+; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NOT: dmb
+; CHECK-NOT: mcr
+
+ ret void
+}
+
+define i32 @not.barriers(i32* %var, i1 %cond) {
+; CHECK-LABEL: not.barriers:
+ br i1 %cond, label %atomic_ver, label %simple_ver
+simple_ver:
+ %oldval = load i32* %var
+ %newval = add nsw i32 %oldval, -1
+ store i32 %newval, i32* %var
+ br label %somewhere
+atomic_ver:
+ fence seq_cst
+ %val = atomicrmw add i32* %var, i32 -1 monotonic
+ fence seq_cst
+ br label %somewhere
+; CHECK: dmb
+; CHECK: ldrex
+; CHECK: dmb
+ ; The key point here is that the second dmb isn't immediately followed by the
+ ; simple_ver basic block, which LLVM attempted to do when DMB had been marked
+ ; with isBarrier. For now, look for something that looks like "somewhere".
+; CHECK-NEXT: mov
+somewhere:
+ %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver]
+ ret i32 %combined
+}