diff options
author | Tim Northover <tnorthover@apple.com> | 2013-07-01 14:48:48 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2013-07-01 14:48:48 +0000 |
commit | d59fc0af0a3ebd13c7004511e64e3233dfe87b17 (patch) | |
tree | 1cc4dbfa52c9735cb04654858ebc786c87a15509 | |
parent | 728af3d574895dd9e4bb5c418c7398297c4f39fe (diff) | |
download | external_llvm-d59fc0af0a3ebd13c7004511e64e3233dfe87b17.zip external_llvm-d59fc0af0a3ebd13c7004511e64e3233dfe87b17.tar.gz external_llvm-d59fc0af0a3ebd13c7004511e64e3233dfe87b17.tar.bz2 |
ARM: relax the atomic release barrier to "dmb ishst"
I believe the full "dmb ish" barrier is not required to guarantee release
semantics for atomic operations. The weaker "dmb ishst" prevents previous
operations being reordered with a store executed afterwards, which is enough.
A key point to note (fortunately already correct) is that this barrier alone is
*insufficient* for sequential consistency, no matter how liberally placed.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185339 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 6 | ||||
-rw-r--r-- | test/CodeGen/ARM/atomic-64bit.ll | 100 | ||||
-rw-r--r-- | test/CodeGen/ARM/atomic-load-store.ll | 46 |
3 files changed, 95 insertions, 57 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ff8571b..370962d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2557,8 +2557,12 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } + ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); + AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); + unsigned Domain = Ord == Release ? ARM_MB::ISHST : ARM_MB::ISH; + return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(ARM_MB::ISH, MVT::i32)); + DAG.getConstant(Domain, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index f2c7305..878119b 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -3,24 +3,24 @@ define i64 @test1(i64* %ptr, i64 %val) { ; CHECK: test1: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: adds [[REG3:(r[0-9]?[02468])]], [[REG1]] ; CHECK: adc [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test1: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw add i64* %ptr, i64 %val seq_cst ret i64 %r @@ -28,24 +28,24 @@ define i64 @test1(i64* %ptr, i64 %val) { define i64 @test2(i64* %ptr, i64 %val) { ; CHECK: test2: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: subs [[REG3:(r[0-9]?[02468])]], [[REG1]] ; CHECK: sbc [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test2: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw sub i64* %ptr, i64 %val seq_cst ret i64 %r @@ -53,24 +53,24 @@ define i64 @test2(i64* %ptr, i64 %val) { define i64 @test3(i64* %ptr, i64 %val) { ; CHECK: test3: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: and [[REG3:(r[0-9]?[02468])]], [[REG1]] ; CHECK: and [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test3: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw and i64* %ptr, i64 %val seq_cst ret i64 %r @@ -78,24 +78,24 @@ define i64 @test3(i64* %ptr, i64 %val) { define i64 @test4(i64* %ptr, i64 %val) { ; CHECK: test4: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: orr [[REG3:(r[0-9]?[02468])]], [[REG1]] ; CHECK: orr [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test4: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw or i64* %ptr, i64 %val seq_cst ret i64 %r @@ -103,24 +103,24 @@ define i64 @test4(i64* %ptr, i64 %val) { define i64 @test5(i64* %ptr, i64 %val) { ; CHECK: test5: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: eor [[REG3:(r[0-9]?[02468])]], [[REG1]] ; CHECK: eor [[REG4:(r[0-9]?[13579])]], [[REG2]] ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test5: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]] ; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw xor i64* %ptr, i64 %val seq_cst ret i64 %r @@ -128,20 +128,20 @@ define i64 @test5(i64* %ptr, i64 %val) { define i64 @test6(i64* %ptr, i64 %val) { ; CHECK: test6: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test6: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst ret i64 %r @@ -149,7 +149,7 @@ define i64 @test6(i64* %ptr, i64 %val) { define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK: test7: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: cmp [[REG1]] ; CHECK: cmpeq [[REG2]] @@ -157,10 +157,10 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test7: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: cmp [[REG1]] ; CHECK-THUMB: it eq @@ -169,7 +169,7 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst ret i64 %r @@ -186,7 +186,7 @@ define i64 @test8(i64* %ptr) { ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test8: ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] @@ -197,7 +197,7 @@ define i64 @test8(i64* %ptr) { ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = load atomic i64* %ptr seq_cst, align 8 ret i64 %r @@ -207,20 +207,20 @@ define i64 @test8(i64* %ptr) { ; way to write it. define void @test9(i64* %ptr, i64 %val) { ; CHECK: test9: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test9: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} store atomic i64 %val, i64* %ptr seq_cst, align 8 ret void @@ -228,7 +228,7 @@ define void @test9(i64* %ptr, i64 %val) { define i64 @test10(i64* %ptr, i64 %val) { ; CHECK: test10: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] @@ -236,10 +236,10 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test10: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] @@ -247,7 +247,7 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw min i64* %ptr, i64 %val seq_cst ret i64 %r @@ -255,7 +255,7 @@ define i64 @test10(i64* %ptr, i64 %val) { define i64 @test11(i64* %ptr, i64 %val) { ; CHECK: test11: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] @@ -263,11 +263,11 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test11: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] @@ -275,7 +275,7 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw umin i64* %ptr, i64 %val seq_cst ret i64 %r @@ -283,7 +283,7 @@ define i64 @test11(i64* %ptr, i64 %val) { define i64 @test12(i64* %ptr, i64 %val) { ; CHECK: test12: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] @@ -291,10 +291,10 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test12: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] @@ -302,7 +302,7 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw max i64* %ptr, i64 %val seq_cst ret i64 %r @@ -310,7 +310,7 @@ define i64 @test12(i64* %ptr, i64 %val) { define i64 @test13(i64* %ptr, i64 %val) { ; CHECK: test13: -; CHECK: dmb ish +; CHECK: dmb ishst ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]] ; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]] @@ -318,10 +318,10 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK: cmp ; CHECK: bne -; CHECK: dmb ish +; CHECK: dmb {{ish$}} ; CHECK-THUMB: test13: -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb ishst ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] ; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] @@ -329,7 +329,7 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne -; CHECK-THUMB: dmb ish +; CHECK-THUMB: dmb {{ish$}} %r = atomicrmw umax i64* %ptr, i64 %val seq_cst ret i64 %r } diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll index 66916a7..7ae7129 100644 --- a/test/CodeGen/ARM/atomic-load-store.ll +++ b/test/CodeGen/ARM/atomic-load-store.ll @@ -6,15 +6,15 @@ define void @test1(i32* %ptr, i32 %val1) { ; ARM: test1 -; ARM: dmb ish +; ARM: dmb ishst ; ARM-NEXT: str -; ARM-NEXT: dmb ish +; ARM-NEXT: dmb {{ish$}} ; THUMBONE: test1 ; THUMBONE: __sync_lock_test_and_set_4 ; THUMBTWO: test1 -; THUMBTWO: dmb ish +; THUMBTWO: dmb ishst ; THUMBTWO-NEXT: str -; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: dmb {{ish$}} store atomic i32 %val1, i32* %ptr seq_cst, align 4 ret void } @@ -22,12 +22,12 @@ define void @test1(i32* %ptr, i32 %val1) { define i32 @test2(i32* %ptr) { ; ARM: test2 ; ARM: ldr -; ARM-NEXT: dmb ish +; ARM-NEXT: dmb {{ish$}} ; THUMBONE: test2 ; THUMBONE: __sync_val_compare_and_swap_4 ; THUMBTWO: test2 ; THUMBTWO: ldr -; THUMBTWO-NEXT: dmb ish +; THUMBTWO-NEXT: dmb {{ish$}} %val = load atomic i32* %ptr seq_cst, align 4 ret i32 %val } @@ -69,3 +69,37 @@ define void @test_old_store_64bit(i64* %p, i64 %v) { store atomic i64 %v, i64* %p seq_cst, align 8 ret void } + +; Release operations only need the store barrier provided by a "dmb ishst", + +define void @test_store_release(i32* %p, i32 %v) { +; ARM: test_store_release: +; ARM: dmb ishst +; THUMBTWO: test_store_release: +; THUMBTWO: dmb ishst + + store atomic i32 %v, i32* %p release, align 4 + ret void +} + +; However, if sequential consistency is needed *something* must ensure a release +; followed by an acquire does not get reordered. In that case a "dmb ishst" is +; not adequate. +define i32 @test_seq_cst(i32* %p, i32 %v) { +; ARM: test_seq_cst: +; ARM: dmb ishst +; ARM: str +; ARM: dmb {{ish$}} +; ARM: ldr +; ARM: dmb {{ish$}} + +; THUMBTWO: test_seq_cst: +; THUMBTWO: dmb ishst +; THUMBTWO: str +; THUMBTWO: dmb {{ish$}} +; THUMBTWO: ldr +; THUMBTWO: dmb {{ish$}} + store atomic i32 %v, i32* %p seq_cst, align 4 + %val = load atomic i32* %p seq_cst, align 4 + ret i32 %val +} |