diff options
author | Mon P Wang <wangmp@apple.com> | 2008-05-05 19:05:59 +0000 |
---|---|---|
committer | Mon P Wang <wangmp@apple.com> | 2008-05-05 19:05:59 +0000 |
commit | 078a62d580cd4e62ecb7940323a953d49a0e72d2 (patch) | |
tree | da7531194465dc141c0d26ff68d1989c462b2c89 /lib | |
parent | de22f24fe536c5b73a1661f39114cfd095dba104 (diff) | |
download | external_llvm-078a62d580cd4e62ecb7940323a953d49a0e72d2.zip external_llvm-078a62d580cd4e62ecb7940323a953d49a0e72d2.tar.gz external_llvm-078a62d580cd4e62ecb7940323a953d49a0e72d2.tar.bz2 |
Added addition atomic instrinsics and, or, xor, min, and max.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@50663 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/CodeGen/MachineBasicBlock.cpp | 13 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 56 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 63 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/TargetSelectionDAG.td | 16 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 222 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 21 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 57 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 2 |
10 files changed, 421 insertions, 47 deletions
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 58ca6ef..01aaba5 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -252,6 +252,19 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { Predecessors.erase(I); } +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) +{ + if (this == fromMBB) + return; + + for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), + end = fromMBB->succ_end(); iter != end; ++iter) { + addSuccessor(*iter); + } + while(!fromMBB->succ_empty()) + fromMBB->removeSuccessor(fromMBB->succ_begin()); +} + bool MachineBasicBlock::isSuccessor(MachineBasicBlock *MBB) const { std::vector<MachineBasicBlock *>::const_iterator I = std::find(Successors.begin(), Successors.end(), MBB); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f8e44ec..4aa38cd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1235,32 +1235,58 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { break; } - case ISD::ATOMIC_LCS: + case ISD::ATOMIC_LCS: { + unsigned int num_operands = 4; + assert(Node->getNumOperands() == num_operands && "Invalid Atomic node!"); + SDOperand Ops[4]; + for (unsigned int x = 0; x < num_operands; ++x) + Ops[x] = LegalizeOp(Node->getOperand(x)); + Result = DAG.UpdateNodeOperands(Result, &Ops[0], num_operands); + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Result = TLI.LowerOperation(Result, DAG); + break; + case TargetLowering::Legal: + break; + } + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result.getValue(Op.ResNo); + } case ISD::ATOMIC_LAS: + case ISD::ATOMIC_LSS: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_SWAP: { - assert(((Node->getNumOperands() == 4 && Node->getOpcode() == ISD::ATOMIC_LCS) || - (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_LAS) || - (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_SWAP)) && - "Invalid Atomic node!"); - int num = Node->getOpcode() == ISD::ATOMIC_LCS ? 4 : 3; - SDOperand Ops[4]; - for (int x = 0; x < num; ++x) + unsigned int num_operands = 3; + assert(Node->getNumOperands() == num_operands && "Invalid Atomic node!"); + SDOperand Ops[3]; + for (unsigned int x = 0; x < num_operands; ++x) Ops[x] = LegalizeOp(Node->getOperand(x)); - Result = DAG.UpdateNodeOperands(Result, &Ops[0], num); + Result = DAG.UpdateNodeOperands(Result, &Ops[0], num_operands); switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { default: assert(0 && "This action is not supported yet!"); case TargetLowering::Custom: Result = TLI.LowerOperation(Result, DAG); break; + case TargetLowering::Expand: + Result = SDOperand(TLI.ExpandOperationResult(Op.Val, DAG),0); + break; case TargetLowering::Legal: break; } AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); return Result.getValue(Op.ResNo); - } - + } case ISD::Constant: { ConstantSDNode *CN = cast<ConstantSDNode>(Node); unsigned opAction = @@ -4242,6 +4268,14 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) { break; } case ISD::ATOMIC_LAS: + case ISD::ATOMIC_LSS: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_SWAP: { Tmp2 = PromoteOp(Node->getOperand(2)); Result = DAG.getAtomic(Node->getOpcode(), Node->getOperand(0), diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6b2f543..26d5548 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2855,7 +2855,11 @@ SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, SDOperand Ptr, SDOperand Val, MVT::ValueType VT) { - assert((Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_SWAP) + assert(( Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_LSS + || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_LOAD_AND + || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR + || Opcode == ISD::ATOMIC_LOAD_MIN || Opcode == ISD::ATOMIC_LOAD_MAX + || Opcode == ISD::ATOMIC_LOAD_UMIN || Opcode == ISD::ATOMIC_LOAD_UMAX) && "Invalid Atomic Op"); SDVTList VTs = getVTList(Val.getValueType(), MVT::Other); FoldingSetNodeID ID; @@ -4269,7 +4273,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_LCS: return "AtomicLCS"; case ISD::ATOMIC_LAS: return "AtomicLAS"; - case ISD::ATOMIC_SWAP: return "AtomicSWAP"; + case ISD::ATOMIC_LSS: return "AtomicLSS"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_SWAP: return "AtomicSWAP"; case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7fc5d61..15e3629 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -732,6 +732,10 @@ public: assert(0 && "UserOp2 should not exist at instruction selection time!"); abort(); } + +private: + inline const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); + }; } // end namespace llvm @@ -2769,6 +2773,22 @@ static void addCatchInfo(CallInst &I, MachineModuleInfo *MMI, } } + +/// Inlined utility function to implement binary input atomic intrinsics for +// visitIntrinsicCall: I is a call instruction +// Op is the associated NodeType for I +const char * +SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { + SDOperand Root = getRoot(); + SDOperand O2 = getValue(I.getOperand(2)); + SDOperand L = DAG.getAtomic(Op, Root, + getValue(I.getOperand(1)), + O2, O2.getValueType()); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; +} + /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. @@ -3205,27 +3225,26 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.setRoot(L.getValue(1)); return 0; } - case Intrinsic::atomic_las: { - SDOperand Root = getRoot(); - SDOperand O2 = getValue(I.getOperand(2)); - SDOperand L = DAG.getAtomic(ISD::ATOMIC_LAS, Root, - getValue(I.getOperand(1)), - O2, O2.getValueType()); - setValue(&I, L); - DAG.setRoot(L.getValue(1)); - return 0; - } - case Intrinsic::atomic_swap: { - SDOperand Root = getRoot(); - SDOperand O2 = getValue(I.getOperand(2)); - SDOperand L = DAG.getAtomic(ISD::ATOMIC_SWAP, Root, - getValue(I.getOperand(1)), - O2, O2.getValueType()); - setValue(&I, L); - DAG.setRoot(L.getValue(1)); - return 0; - } - + case Intrinsic::atomic_las: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LAS); + case Intrinsic::atomic_lss: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LSS); + case Intrinsic::atomic_load_and: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); + case Intrinsic::atomic_load_or: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); + case Intrinsic::atomic_load_xor: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); + case Intrinsic::atomic_load_min: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); + case Intrinsic::atomic_load_max: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); + case Intrinsic::atomic_load_umin: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); + case Intrinsic::atomic_load_umax: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); + case Intrinsic::atomic_swap: + return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); } } @@ -4519,8 +4538,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - - bool SelectionDAGISel::runOnFunction(Function &Fn) { // Get alias analysis for load/store combining. AA = &getAnalysis<AliasAnalysis>(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5ccad9e..371fbab 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -165,7 +165,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { TargetLowering::TargetLowering(TargetMachine &tm) : TM(tm), TD(TM.getTargetData()) { - assert(ISD::BUILTIN_OP_END <= 156 && + assert(ISD::BUILTIN_OP_END <= OpActionsCapacity && "Fixed size array in TargetLowering is not large enough!"); // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); diff --git a/lib/Target/TargetSelectionDAG.td b/lib/Target/TargetSelectionDAG.td index d70cc75..209cda0 100644 --- a/lib/Target/TargetSelectionDAG.td +++ b/lib/Target/TargetSelectionDAG.td @@ -358,6 +358,22 @@ def atomic_las : SDNode<"ISD::ATOMIC_LAS" , STDAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_lss : SDNode<"ISD::ATOMIC_LSS" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e90c930..2df8af5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -292,10 +292,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) if (!Subtarget->hasSSE2()) setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // Expand certain atomics setOperationAction(ISD::ATOMIC_LCS , MVT::i8, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i16, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LSS , MVT::i32, Expand); // Use the default ISD::LOCATION, ISD::DECLARE expansion. setOperationAction(ISD::LOCATION, MVT::Other, Expand); @@ -5511,6 +5513,15 @@ SDNode* X86TargetLowering::ExpandATOMIC_LCS(SDNode* Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, Tys, ResultVal, cpOutH.getValue(1)).Val; } +SDNode* X86TargetLowering::ExpandATOMIC_LSS(SDNode* Op, SelectionDAG &DAG) { + MVT::ValueType T = cast<AtomicSDNode>(Op)->getVT(); + assert (T == MVT::i32 && "Only know how to expand i32 LSS"); + SDOperand negOp = DAG.getNode(ISD::SUB, T, + DAG.getConstant(0, T), Op->getOperand(2)); + return DAG.getAtomic(ISD::ATOMIC_LAS, Op->getOperand(0), + Op->getOperand(1), negOp, T).Val; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -5568,6 +5579,7 @@ SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); case ISD::ATOMIC_LCS: return ExpandATOMIC_LCS(N, DAG); + case ISD::ATOMIC_LSS: return ExpandATOMIC_LSS(N,DAG); } } @@ -5732,6 +5744,187 @@ X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDOperand> &BVOps, // X86 Scheduler Hooks //===----------------------------------------------------------------------===// +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, + MachineBasicBlock *MBB, + unsigned regOpc, + unsigned immOpc) { + // For the atomic bitwise operator, we generate + // thisMBB: + // newMBB: + // ld EAX = [bitinstr.addr] + // mov t1 = EAX + // op t2 = t1, [bitinstr.val] + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + ilist<MachineBasicBlock>::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB); + F->getBasicBlockList().insert(MBBIter, newMBB); + F->getBasicBlockList().insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to itself and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + assert(bInstr->getNumOperands() < 8 && "unexpected number of operands"); + MachineOperand& destOper = bInstr->getOperand(0); + MachineOperand* argOpers[6]; + int numArgs = bInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &bInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + int valArgIndx = 4; + + MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1); + MIB.addReg(X86::EAX); + + unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) + && "invalid operand"); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, TII->get(regOpc), t2); + else + MIB = BuildMI(newMBB, TII->get(immOpc), t2); + MIB.addReg(t1); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t2); + + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); + MIB.addReg(X86::EAX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + delete bInstr; // The pseudo instruction is gone now. + return nextMBB; +} + +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, + MachineBasicBlock *MBB, + unsigned cmovOpc) { + // For the atomic min/max operator, we generate + // thisMBB: + // newMBB: + // ld EAX = [min/max.addr] + // mov t1 = EAX + // mov t2 = [min/max.val] + // cmp t1, t2 + // cmov[cond] t2 = t1 + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + // + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + ilist<MachineBasicBlock>::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB); + F->getBasicBlockList().insert(MBBIter, newMBB); + F->getBasicBlockList().insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to newMBB and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + assert(mInstr->getNumOperands() < 8 && "unexpected number of operands"); + MachineOperand& destOper = mInstr->getOperand(0); + MachineOperand* argOpers[6]; + int numArgs = mInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &mInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + int valArgIndx = 4; + + MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1); + MIB.addReg(X86::EAX); + + // We only support register and immediate values + assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) + && "invalid operand"); + + unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); + else + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, TII->get(X86::CMP32rr)); + MIB.addReg(t1); + MIB.addReg(t2); + + // Generate movc + unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(cmovOpc),t3); + MIB.addReg(t2); + MIB.addReg(t1); + + // Cmp and exchange if none has modified the memory location + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t3); + + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); + MIB.addReg(X86::EAX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + delete mInstr; // The pseudo instruction is gone now. + return nextMBB; +} + + MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) { @@ -5766,15 +5959,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); F->getBasicBlockList().insert(It, copy0MBB); F->getBasicBlockList().insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current + // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + sinkMBB->transferSuccessors(BB); + + // Add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -5874,6 +6063,23 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, delete MI; // The pseudo instruction is gone now. return BB; } + case X86::ATOMAND32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, + X86::AND32ri); + case X86::ATOMOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, + X86::OR32ri); + case X86::ATOMXOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, + X86::XOR32ri); + case X86::ATOMMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); + case X86::ATOMMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); + case X86::ATOMUMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); + case X86::ATOMUMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 55ad70e..2879039 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -346,6 +346,7 @@ namespace llvm { virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB); + /// getTargetNodeName - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -524,7 +525,8 @@ namespace llvm { SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG); - + SDNode *ExpandATOMIC_LSS(SDNode *N, SelectionDAG &DAG); + SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG, SDOperand Chain, SDOperand Dst, SDOperand Src, @@ -537,6 +539,23 @@ namespace llvm { bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); + + /// Utility function to emit atomic bitwise operations (and, or, xor). + // It takes the bitwise instruction to expand, the associated machine basic + // block, and the associated X86 opcodes for reg/reg and reg/imm. + MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned regOpc, + unsigned immOpc); + + /// Utility function to emit atomic min and max. It takes the min/max + // instruction to expand, the associated basic block, and the associated + // cmov opcode for moving the min or max value. + MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned cmovOpc); + }; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c539eca..af61540 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2598,6 +2598,63 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), TB, LOCK; } +// Atomic exchange and and, or, xor +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMAND32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMAND32 PSUEDO!", + [(set GR32:$dst, (atomic_load_and addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMOR32 : I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMOR32 PSUEDO!", + [(set GR32:$dst, (atomic_load_or addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMXOR32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMXOR32 PSUEDO!", + [(set GR32:$dst, (atomic_load_xor addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMMIN32: I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "#ATOMMIN32 PSUEDO!", + [(set GR32:$dst, (atomic_load_min addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMMAX32: I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMMAX32 PSUEDO!", + [(set GR32:$dst, (atomic_load_max addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMUMIN32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMIN32 PSUEDO!", + [(set GR32:$dst, (atomic_load_umin addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMUMAX32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMAX32 PSUEDO!", + [(set GR32:$dst, (atomic_load_umax addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 2dd7b45..c004077 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -247,7 +247,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , MaxInlineSizeThreshold(128) , Is64Bit(is64Bit) , TargetType(isELF) { // Default to ELF unless otherwise specified. - + // Determine default and user specified characteristics if (!FS.empty()) { // If feature string is not empty, parse features string. |