aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/LangRef.html106
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h8
-rw-r--r--include/llvm/Intrinsics.td5
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp9
-rw-r--r--lib/Target/Alpha/AlphaInstrFormats.td8
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td9
-rw-r--r--lib/Target/TargetSelectionDAG.td7
-rw-r--r--lib/Target/X86/README.txt7
-rw-r--r--lib/Target/X86/X86InstrSSE.td8
-rw-r--r--test/CodeGen/Alpha/mb.ll8
-rw-r--r--test/CodeGen/Alpha/wmb.ll8
-rw-r--r--test/CodeGen/X86/lfence.ll8
-rw-r--r--test/CodeGen/X86/mfence.ll20
-rw-r--r--test/CodeGen/X86/nofence.ll27
-rw-r--r--test/CodeGen/X86/sfence.ll8
17 files changed, 256 insertions, 1 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html
index d9d5ca8..6267cf8 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -204,6 +204,11 @@
<li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
</ol>
</li>
+ <li><a href="#int_atomics">Atomic intrinsics</a>
+ <ol>
+ <li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></li>
+ </ol>
+ </li>
<li><a href="#int_general">General intrinsics</a>
<ol>
<li><a href="#int_var_annotation">
@@ -5234,6 +5239,107 @@ declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;n
<!-- ======================================================================= -->
<div class="doc_subsection">
+ <a name="int_atomics">Atomic Operations and Synchronization Intrinsics</a>
+</div>
+
+<div class="doc_text">
+<p>
+ These intrinsic functions expand the "universal IR" of LLVM to represent
+ hardware constructs for atomic operations and memory synchronization. This
+ provides an interface to the hardware, not an interface to the programmer. It
+ is aimed at a low enough level to allow any programming models or APIs which
+ need atomic behaviors to map cleanly onto it. It is also modeled primarily on
+ hardware behavior. Just as hardware provides a "universal IR" for source
+ languages, it also provides a starting point for developing a "universal"
+ atomic operation and synchronization IR.
+</p>
+<p>
+ These do <em>not</em> form an API such as high-level threading libraries,
+ software transaction memory systems, atomic primitives, and intrinsic
+ functions as found in BSD, GNU libc, atomic_ops, APR, and other system and
+ application libraries. The hardware interface provided by LLVM should allow
+ a clean implementation of all of these APIs and parallel programming models.
+ No one model or paradigm should be selected above others unless the hardware
+ itself ubiquitously does so.
+
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_memory_barrier">'<tt>llvm.memory.barrier</tt>' Intrinsic</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre>
+declare void @llvm.memory.barrier( i1 &lt;ll&gt;, i1 &lt;ls&gt;, i1 &lt;sl&gt;, i1 &lt;ss&gt;,
+i1 &lt;device&gt; )
+
+</pre>
+<h5>Overview:</h5>
+<p>
+ The <tt>llvm.memory.barrier</tt> intrinsic guarantees ordering between
+ specific pairs of memory access types.
+</p>
+<h5>Arguments:</h5>
+<p>
+ The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments.
+ The first four arguments enables a specific barrier as listed below. The fith
+ argument specifies that the barrier applies to io or device or uncached memory.
+
+</p>
+ <ul>
+ <li><tt>ll</tt>: load-load barrier</li>
+ <li><tt>ls</tt>: load-store barrier</li>
+ <li><tt>sl</tt>: store-load barrier</li>
+ <li><tt>ss</tt>: store-store barrier</li>
+ <li><tt>device</tt>: barrier applies to device and uncached memory also.
+ </ul>
+<h5>Semantics:</h5>
+<p>
+ This intrinsic causes the system to enforce some ordering constraints upon
+ the loads and stores of the program. This barrier does not indicate
+ <em>when</em> any events will occur, it only enforces an <em>order</em> in
+ which they occur. For any of the specified pairs of load and store operations
+ (f.ex. load-load, or store-load), all of the first operations preceding the
+ barrier will complete before any of the second operations succeeding the
+ barrier begin. Specifically the semantics for each pairing is as follows:
+</p>
+ <ul>
+ <li><tt>ll</tt>: All loads before the barrier must complete before any load
+ after the barrier begins.</li>
+
+ <li><tt>ls</tt>: All loads before the barrier must complete before any
+ store after the barrier begins.</li>
+ <li><tt>ss</tt>: All stores before the barrier must complete before any
+ store after the barrier begins.</li>
+ <li><tt>sl</tt>: All stores before the barrier must complete before any
+ load after the barrier begins.</li>
+ </ul>
+<p>
+ These semantics are applied with a logical "and" behavior when more than one
+ is enabled in a single memory barrier intrinsic.
+</p>
+<p>
+ Backends may implement stronger barriers than those requested when they do not
+ support as fine grained a barrier as requested. Some architectures do not
+ need all types of barriers and on such architectures, these become noops.
+</p>
+<h5>Example:</h5>
+<pre>
+%ptr = malloc i32
+ store i32 4, %ptr
+
+%result1 = load i32* %ptr <i>; yields {i32}:result1 = 4</i>
+ call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 false )
+ <i>; guarantee the above finishes</i>
+ store i32 8, %ptr <i>; before this begins</i>
+</pre>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
<a name="int_general">General Intrinsics</a>
</div>
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 4f56b9b..415e291 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -589,6 +589,14 @@ namespace ISD {
// TRAP - Trapping instruction
TRAP,
+ // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
+ // store-store, device)
+ // This corresponds to the atomic.barrier intrinsic.
+ // it takes an input chain, 4 operands to specify the type of barrier, an
+ // operand specifying if the barrier applies to device and uncached memory
+ // and produces an output chain.
+ MEMBARRIER,
+
// BUILTIN_OP_END - This must be the last enum value in this list.
BUILTIN_OP_END
};
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 703b794..64b3abf 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -262,6 +262,11 @@ def int_init_trampoline : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty,
llvm_ptr_ty], []>,
GCCBuiltin<"__builtin_init_trampoline">;
+//===------------------------- Atomic Intrinsics --------------------------===//
+//
+def int_memory_barrier : Intrinsic<[llvm_void_ty, llvm_i1_ty, llvm_i1_ty,
+ llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>;
+
//===-------------------------- Other Intrinsics --------------------------===//
//
def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b505918..0d6a47c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1133,6 +1133,16 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
}
break;
+ case ISD::MEMBARRIER: {
+ assert(Node->getNumOperands() == 6 && "Invalid MemBarrier node!");
+ SDOperand Ops[6];
+ Ops[0] = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
+ for (int x = 1; x < 6; ++x)
+ Ops[x] = PromoteOp(Node->getOperand(x));
+ Result = DAG.UpdateNodeOperands(Result, &Ops[0], 6);
+ break;
+ }
+
case ISD::Constant: {
ConstantSDNode *CN = cast<ConstantSDNode>(Node);
unsigned opAction =
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f8e1fea..c12c98b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3792,6 +3792,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
return "<<Unknown Target Node>>";
}
+ case ISD::MEMBARRIER: return "MemBarrier";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::SRCVALUE: return "SrcValue";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b3d7fbf..7b01d63 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -3046,6 +3046,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getNode(ISD::TRAP, MVT::Other, getRoot()));
return 0;
}
+ case Intrinsic::memory_barrier: {
+ SDOperand Ops[6];
+ Ops[0] = getRoot();
+ for (int x = 1; x < 6; ++x)
+ Ops[x] = getValue(I.getOperand(x));
+
+ DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, MVT::Other, &Ops[0], 6));
+ return 0;
+ }
}
}
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
index 366aea8..6eb59e0 100644
--- a/lib/Target/Alpha/AlphaInstrFormats.td
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -62,6 +62,14 @@ class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
let Inst{20-16} = 0;
let Inst{15-0} = fc;
}
+class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin>
+ : InstAlpha<opcode, asmstr, itin> {
+ let OutOperandList = (ops);
+ let InOperandList = (ops);
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = fc;
+}
class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
: InstAlpha<opcode, asmstr, itin> {
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 474180f..6274a3e 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -568,8 +568,14 @@ def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
(LDQl texternalsym:$ext, GPRC:$RB)>;
-
def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
+def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
+
+def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
+ (WMB)>;
+def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
+ (MB)>;
//Basic Floating point ops
@@ -959,6 +965,7 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
//S_floating : IEEE Single
//T_floating : IEEE Double
+
//Unused instructions
//Mnemonic Format Opcode Description
//CALL_PAL Pcd 00 Trap to PALcode
diff --git a/lib/Target/TargetSelectionDAG.td b/lib/Target/TargetSelectionDAG.td
index 21bdb5c..eeed994 100644
--- a/lib/Target/TargetSelectionDAG.td
+++ b/lib/Target/TargetSelectionDAG.td
@@ -185,6 +185,11 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert
SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
]>;
+def STDMemBarrier : SDTypeProfile<0, 5, [
+ SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
+ SDTCisInt<0>
+]>;
+
class SDCallSeqStart<list<SDTypeConstraint> constraints> :
SDTypeProfile<0, 1, constraints>;
class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
@@ -329,6 +334,8 @@ def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>;
def ret : SDNode<"ISD::RET" , SDTNone, [SDNPHasChain]>;
def trap : SDNode<"ISD::TRAP" , SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
+def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier,
+ [SDNPHasChain, SDNPSideEffect]>;
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
// and truncst (see below).
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 5a4f7c4..846d694 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1509,3 +1509,10 @@ void test(double *P) {
}
//===---------------------------------------------------------------------===//
+
+handling llvm.memory.barrier on pre SSE2 cpus
+
+should generate:
+lock ; mov %esp, %esp
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 183ee2c..3d225ee 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2149,6 +2149,14 @@ def LFENCE : I<0xAE, MRM5m, (outs), (ins),
def MFENCE : I<0xAE, MRM6m, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+//TODO: custom lower this so as to never even generate the noop
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+ (i8 0)), (NOOP)>;
+def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+ (i8 1)), (MFENCE)>;
+
// Alias instructions that map zero vector to pxor / xorp* for sse.
let isReMaterializable = 1 in
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins),
diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll
new file mode 100644
index 0000000..50c245f
--- /dev/null
+++ b/test/CodeGen/Alpha/mb.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=alpha | grep mb
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
+ ret void
+}
diff --git a/test/CodeGen/Alpha/wmb.ll b/test/CodeGen/Alpha/wmb.ll
new file mode 100644
index 0000000..f745cd5
--- /dev/null
+++ b/test/CodeGen/Alpha/wmb.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=alpha | grep wmb
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true , i1 true)
+ ret void
+}
diff --git a/test/CodeGen/X86/lfence.ll b/test/CodeGen/X86/lfence.ll
new file mode 100644
index 0000000..0721d73
--- /dev/null
+++ b/test/CodeGen/X86/lfence.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep lfence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 true)
+ ret void
+}
diff --git a/test/CodeGen/X86/mfence.ll b/test/CodeGen/X86/mfence.ll
new file mode 100644
index 0000000..6abdbce
--- /dev/null
+++ b/test/CodeGen/X86/mfence.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep mfence
+
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 true)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 true)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 true)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 true)
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 true)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 true)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 true)
+ ret void
+}
diff --git a/test/CodeGen/X86/nofence.ll b/test/CodeGen/X86/nofence.ll
new file mode 100644
index 0000000..132ac94
--- /dev/null
+++ b/test/CodeGen/X86/nofence.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep fence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 true, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 true, i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 true, i1 true, i1 false)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 true, i1 true, i1 true, i1 false)
+
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
+ ret void
+}
diff --git a/test/CodeGen/X86/sfence.ll b/test/CodeGen/X86/sfence.ll
new file mode 100644
index 0000000..fc75ccb
--- /dev/null
+++ b/test/CodeGen/X86/sfence.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep sfence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true)
+ ret void
+}