diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 81 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrCompiler.td | 3 |
2 files changed, 83 insertions, 1 deletions
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 4534e85..de7aaff 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -189,6 +189,7 @@ namespace { SDNode *Select(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); + SDNode *SelectAtomicLoadOr(SDNode *Node, EVT NVT); bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); @@ -1329,6 +1330,8 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { return ResNode; } +// FIXME: Figure out some way to unify this with the 'or' and other code +// below. SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; @@ -1479,6 +1482,78 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } } +SDNode *X86DAGToDAGISel::SelectAtomicLoadOr(SDNode *Node, EVT NVT) { + if (Node->hasAnyUseOfValue(0)) + return 0; + + // Optimize common patterns for __sync_or_and_fetch where the result + // is not used. This allows us to use the "lock" version of the or + // instruction. + // FIXME: Same as for 'add' and 'sub'. + SDValue Chain = Node->getOperand(0); + SDValue Ptr = Node->getOperand(1); + SDValue Val = Node->getOperand(2); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + return 0; + + bool isCN = false; + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); + if (CN) { + isCN = true; + Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); + } + + unsigned Opc = 0; + switch (NVT.getSimpleVT().SimpleTy) { + default: return 0; + case MVT::i8: + if (isCN) + Opc = X86::LOCK_OR8mi; + else + Opc = X86::LOCK_OR8mr; + break; + case MVT::i16: + if (isCN) { + if (immSext8(Val.getNode())) + Opc = X86::LOCK_OR16mi8; + else + Opc = X86::LOCK_OR16mi; + } else + Opc = X86::LOCK_OR16mr; + break; + case MVT::i32: + if (isCN) { + if (immSext8(Val.getNode())) + Opc = X86::LOCK_OR32mi8; + else + Opc = X86::LOCK_OR32mi; + } else + Opc = X86::LOCK_OR32mr; + break; + case MVT::i64: + if (isCN) { + if (immSext8(Val.getNode())) + Opc = X86::LOCK_OR64mi8; + else if (i64immSExt32(Val.getNode())) + Opc = X86::LOCK_OR64mi32; + } else + Opc = X86::LOCK_OR64mr; + break; + } + + DebugLoc dl = Node->getDebugLoc(); + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, NVT), 0); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + SDValue RetVals[] = { Undef, Ret }; + return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); +} + /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has /// any uses which require the SF or OF bits to be accurate. static bool HasNoSignedComparisonUses(SDNode *N) { @@ -1580,6 +1655,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return RetVal; break; } + case ISD::ATOMIC_LOAD_OR: { + SDNode *RetVal = SelectAtomicLoadOr(Node, NVT); + if (RetVal) + return RetVal; + break; + } case ISD::AND: case ISD::OR: case ISD::XOR: { diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 662fddf..31b33ab 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -580,7 +580,7 @@ def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{b}\t", - "{$src2, $dst|$dst, $src2}"), + "{$src2, $dst|$dst, $src2}"), []>, LOCK; def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, @@ -629,6 +629,7 @@ def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">; defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">; +defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">; // Optimized codegen when the non-memory output is not used. let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { |