aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h25
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h8
-rw-r--r--include/llvm/Target/TargetLowering.h63
-rw-r--r--include/llvm/Target/TargetSubtarget.h3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp118
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp45
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp369
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp282
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp55
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp54
-rw-r--r--lib/Target/ARM/ARMISelLowering.h15
-rw-r--r--lib/Target/ARM/ARMSubtarget.h2
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp4
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp3
-rw-r--r--lib/Target/IA64/IA64ISelLowering.cpp3
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp3
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp10
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp271
-rw-r--r--lib/Target/X86/X86ISelLowering.h20
-rw-r--r--test/CodeGen/X86/2004-02-12-Memcpy.llx3
-rw-r--r--test/CodeGen/X86/byval2.ll4
-rw-r--r--test/CodeGen/X86/byval3.ll6
-rw-r--r--test/CodeGen/X86/byval4.ll10
-rw-r--r--test/CodeGen/X86/byval5.ll18
-rw-r--r--test/CodeGen/X86/byval7.ll3
-rw-r--r--test/CodeGen/X86/small-byval-memcpy.ll22
-rw-r--r--test/CodeGen/X86/variable-sized-darwin-bzero.ll8
31 files changed, 687 insertions, 750 deletions
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 2b8d678..8ea14a5 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -323,17 +323,20 @@ public:
SDOperand getNode(unsigned Opcode, SDVTList VTs,
const SDOperand *Ops, unsigned NumOps);
- SDOperand getMemcpy(SDOperand Chain, SDOperand Dest, SDOperand Src,
- SDOperand Size, SDOperand Align,
- SDOperand AlwaysInline);
-
- SDOperand getMemmove(SDOperand Chain, SDOperand Dest, SDOperand Src,
- SDOperand Size, SDOperand Align,
- SDOperand AlwaysInline);
-
- SDOperand getMemset(SDOperand Chain, SDOperand Dest, SDOperand Src,
- SDOperand Size, SDOperand Align,
- SDOperand AlwaysInline);
+ SDOperand getMemcpy(SDOperand Chain, SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff);
+
+ SDOperand getMemmove(SDOperand Chain, SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff);
+
+ SDOperand getMemset(SDOperand Chain, SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ Value *DstSV, uint64_t DstOff);
/// getSetCC - Helper function to make it easier to build SetCC's if you just
/// have an ISD::CondCode instead of an SDOperand.
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 6b2b857..deded1a 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -497,14 +497,6 @@ namespace ISD {
// it returns an output chain.
STACKRESTORE,
- // MEMSET/MEMCPY/MEMMOVE - The first operand is the chain. The following
- // correspond to the operands of the LLVM intrinsic functions and the last
- // one is AlwaysInline. The only result is a token chain. The alignment
- // argument is guaranteed to be a Constant node.
- MEMSET,
- MEMMOVE,
- MEMCPY,
-
// CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of
// a call sequence, and carry arbitrary information that target might want
// to know. The first operand is a chain, the rest are specified by the
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 719f719..16f9ed6 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -948,18 +948,61 @@ public:
SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG);
- virtual SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
- virtual SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest,
- SDOperand Source, SDOperand Count,
- SelectionDAG &DAG);
- virtual SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest,
- SDOperand Source, unsigned Size,
- unsigned Align, SelectionDAG &DAG) {
- assert(0 && "Not Implemented");
- return SDOperand(); // this is here to silence compiler errors
+ /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
+ /// memcpy. This can be used by targets to provide code sequences for cases
+ /// that don't fit the target's parameters for simple loads/stores and can be
+ /// more efficient than using a library call. This function can return a null
+ /// SDOperand if the target declines to use inline code and a different
+ /// lowering strategy should be used.
+ ///
+ /// If AlwaysInline is true, the size is constant and the target should not
+ /// emit any calls and is strongly encouraged to attempt to emit inline code
+ /// even if it is beyond the usual threshold because this intrinsic is being
+ /// expanded in a place where calls are not feasible (e.g. within the prologue
+ /// for another call). If the target chooses to decline an AlwaysInline
+ /// request here, legalize will resort to using simple loads and stores.
+ virtual SDOperand
+ EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Op1, SDOperand Op2,
+ SDOperand Op3, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+ return SDOperand();
+ }
+
+ /// EmitTargetCodeForMemmove - Emit target-specific code that performs a
+ /// memmove. This can be used by targets to provide code sequences for cases
+ /// that don't fit the target's parameters for simple loads/stores and can be
+ /// more efficient than using a library call. This function can return a null
+ /// SDOperand if the target declines to use code and a different lowering
+ /// strategy should be used.
+ virtual SDOperand
+ EmitTargetCodeForMemmove(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Op1, SDOperand Op2,
+ SDOperand Op3, unsigned Align,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+ return SDOperand();
+ }
+
+ /// EmitTargetCodeForMemset - Emit target-specific code that performs a
+ /// memset. This can be used by targets to provide code sequences for cases
+ /// that don't fit the target's parameters for simple stores and can be more
+ /// efficient than using a library call. This function can return a null
+ /// SDOperand if the target declines to use code and a different lowering
+ /// strategy should be used.
+ virtual SDOperand
+ EmitTargetCodeForMemset(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Op1, SDOperand Op2,
+ SDOperand Op3, unsigned Align,
+ Value *DstSV, uint64_t DstOff) {
+ return SDOperand();
}
-
/// LowerOperation - This callback is invoked for operations that are
/// unsupported by the target, which are registered to use 'custom' lowering,
/// and whose defined values are all legal.
diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h
index 1096b16..fde8f44 100644
--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtarget.h
@@ -28,9 +28,6 @@ class TargetSubtarget {
protected: // Can only create subclasses...
TargetSubtarget();
public:
- /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
- /// that still makes it profitable to inline the call.
- virtual unsigned getMaxInlineSizeThreshold() const {return 0; }
virtual ~TargetSubtarget();
};
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5cb13e3..2df363e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -22,6 +22,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -2842,123 +2843,6 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) {
break;
}
break;
- case ISD::MEMSET:
- case ISD::MEMCPY:
- case ISD::MEMMOVE: {
- Tmp1 = LegalizeOp(Node->getOperand(0)); // Chain
- Tmp2 = LegalizeOp(Node->getOperand(1)); // Pointer
-
- if (Node->getOpcode() == ISD::MEMSET) { // memset = ubyte
- switch (getTypeAction(Node->getOperand(2).getValueType())) {
- case Expand: assert(0 && "Cannot expand a byte!");
- case Legal:
- Tmp3 = LegalizeOp(Node->getOperand(2));
- break;
- case Promote:
- Tmp3 = PromoteOp(Node->getOperand(2));
- break;
- }
- } else {
- Tmp3 = LegalizeOp(Node->getOperand(2)); // memcpy/move = pointer,
- }
-
- SDOperand Tmp4;
- switch (getTypeAction(Node->getOperand(3).getValueType())) {
- case Expand: {
- // Length is too big, just take the lo-part of the length.
- SDOperand HiPart;
- ExpandOp(Node->getOperand(3), Tmp4, HiPart);
- break;
- }
- case Legal:
- Tmp4 = LegalizeOp(Node->getOperand(3));
- break;
- case Promote:
- Tmp4 = PromoteOp(Node->getOperand(3));
- break;
- }
-
- SDOperand Tmp5;
- switch (getTypeAction(Node->getOperand(4).getValueType())) { // uint
- case Expand: assert(0 && "Cannot expand this yet!");
- case Legal:
- Tmp5 = LegalizeOp(Node->getOperand(4));
- break;
- case Promote:
- Tmp5 = PromoteOp(Node->getOperand(4));
- break;
- }
-
- SDOperand Tmp6;
- switch (getTypeAction(Node->getOperand(5).getValueType())) { // bool
- case Expand: assert(0 && "Cannot expand this yet!");
- case Legal:
- Tmp6 = LegalizeOp(Node->getOperand(5));
- break;
- case Promote:
- Tmp6 = PromoteOp(Node->getOperand(5));
- break;
- }
-
- switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
- default: assert(0 && "This action not implemented for this operation!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal: {
- SDOperand Ops[] = { Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6 };
- Result = DAG.UpdateNodeOperands(Result, Ops, 6);
- if (isCustom) {
- Tmp1 = TLI.LowerOperation(Result, DAG);
- if (Tmp1.Val) Result = Tmp1;
- }
- break;
- }
- case TargetLowering::Expand: {
- // Otherwise, the target does not support this operation. Lower the
- // operation to an explicit libcall as appropriate.
- MVT::ValueType IntPtr = TLI.getPointerTy();
- const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
-
- const char *FnName = 0;
- if (Node->getOpcode() == ISD::MEMSET) {
- Entry.Node = Tmp2; Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- // Extend the (previously legalized) ubyte argument to be an int value
- // for the call.
- if (Tmp3.getValueType() > MVT::i32)
- Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3);
- else
- Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3);
- Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
- Args.push_back(Entry);
- Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt = false;
- Args.push_back(Entry);
-
- FnName = "memset";
- } else if (Node->getOpcode() == ISD::MEMCPY ||
- Node->getOpcode() == ISD::MEMMOVE) {
- Entry.Ty = IntPtrTy;
- Entry.Node = Tmp2; Args.push_back(Entry);
- Entry.Node = Tmp3; Args.push_back(Entry);
- Entry.Node = Tmp4; Args.push_back(Entry);
- FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" : "memcpy";
- } else {
- assert(0 && "Unknown op!");
- }
-
- std::pair<SDOperand,SDOperand> CallResult =
- TLI.LowerCallTo(Tmp1, Type::VoidTy,
- false, false, false, CallingConv::C, false,
- DAG.getExternalSymbol(FnName, IntPtr), Args, DAG);
- Result = CallResult.second;
- break;
- }
- }
- break;
- }
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6511cff..380c422 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -439,51 +439,6 @@ SDOperand DAGTypeLegalizer::CreateStackStoreLoad(SDOperand Op,
return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0);
}
-/// HandleMemIntrinsic - This handles memcpy/memset/memmove with invalid
-/// operands. This promotes or expands the operands as required.
-SDOperand DAGTypeLegalizer::HandleMemIntrinsic(SDNode *N) {
- // The chain and pointer [operands #0 and #1] are always valid types.
- SDOperand Chain = N->getOperand(0);
- SDOperand Ptr = N->getOperand(1);
- SDOperand Op2 = N->getOperand(2);
-
- // Op #2 is either a value (memset) or a pointer. Promote it if required.
- switch (getTypeAction(Op2.getValueType())) {
- default: assert(0 && "Unknown action for pointer/value operand");
- case Legal: break;
- case Promote: Op2 = GetPromotedOp(Op2); break;
- }
-
- // The length could have any action required.
- SDOperand Length = N->getOperand(3);
- switch (getTypeAction(Length.getValueType())) {
- default: assert(0 && "Unknown action for memop operand");
- case Legal: break;
- case Promote: Length = GetPromotedZExtOp(Length); break;
- case Expand:
- SDOperand Dummy; // discard the high part.
- GetExpandedOp(Length, Length, Dummy);
- break;
- }
-
- SDOperand Align = N->getOperand(4);
- switch (getTypeAction(Align.getValueType())) {
- default: assert(0 && "Unknown action for memop operand");
- case Legal: break;
- case Promote: Align = GetPromotedZExtOp(Align); break;
- }
-
- SDOperand AlwaysInline = N->getOperand(5);
- switch (getTypeAction(AlwaysInline.getValueType())) {
- default: assert(0 && "Unknown action for memop operand");
- case Legal: break;
- case Promote: AlwaysInline = GetPromotedZExtOp(AlwaysInline); break;
- }
-
- SDOperand Ops[] = { Chain, Ptr, Op2, Length, Align, AlwaysInline };
- return DAG.UpdateNodeOperands(SDOperand(N, 0), Ops, 6);
-}
-
/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
SDOperand DAGTypeLegalizer::JoinIntegers(SDOperand Lo, SDOperand Hi) {
MVT::ValueType LVT = Lo.getValueType();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 7d245ab..5b98793 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -165,7 +165,6 @@ private:
// Common routines.
SDOperand BitConvertToInteger(SDOperand Op);
SDOperand CreateStackStoreLoad(SDOperand Op, MVT::ValueType DestVT);
- SDOperand HandleMemIntrinsic(SDNode *N);
SDOperand JoinIntegers(SDOperand Lo, SDOperand Hi);
void SplitInteger(SDOperand Op, SDOperand &Lo, SDOperand &Hi);
void SplitInteger(SDOperand Op, MVT::ValueType LoVT, MVT::ValueType HiVT,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
index b872a44..fcde8f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
@@ -946,9 +946,6 @@ bool DAGTypeLegalizer::ExpandOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = ExpandOperand_STORE(cast<StoreSDNode>(N), OpNo);
break;
- case ISD::MEMSET:
- case ISD::MEMCPY:
- case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOperand_BUILD_VECTOR(N); break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
index b8118eb..93c8c60 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
@@ -447,9 +447,6 @@ bool DAGTypeLegalizer::PromoteOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE: Res = PromoteOperand_STORE(cast<StoreSDNode>(N),
OpNo); break;
- case ISD::MEMSET:
- case ISD::MEMCPY:
- case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
case ISD::BUILD_VECTOR: Res = PromoteOperand_BUILD_VECTOR(N); break;
case ISD::INSERT_VECTOR_ELT:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f096c70..327a8fe 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -17,6 +17,7 @@
#include "llvm/Intrinsics.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -2385,28 +2386,357 @@ SDOperand SelectionDAG::getNode(unsigned Opcode, MVT::ValueType VT,
return getNode(Opcode, VT, Ops, 5);
}
-SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dest,
- SDOperand Src, SDOperand Size,
- SDOperand Align,
- SDOperand AlwaysInline) {
- SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
- return getNode(ISD::MEMCPY, MVT::Other, Ops, 6);
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
+ SelectionDAG &DAG) {
+ MVT::ValueType CurVT = VT;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+ uint64_t Val = C->getValue() & 255;
+ unsigned Shift = 8;
+ while (CurVT != MVT::i8) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+ }
+ return DAG.getConstant(Val, VT);
+ } else {
+ Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
+ unsigned Shift = 8;
+ while (CurVT != MVT::i8) {
+ Value =
+ DAG.getNode(ISD::OR, VT,
+ DAG.getNode(ISD::SHL, VT, Value,
+ DAG.getConstant(Shift, MVT::i8)), Value);
+ Shift <<= 1;
+ CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
+ }
+
+ return Value;
+ }
}
-SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dest,
- SDOperand Src, SDOperand Size,
- SDOperand Align,
- SDOperand AlwaysInline) {
- SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
- return getNode(ISD::MEMMOVE, MVT::Other, Ops, 6);
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDOperand getMemsetStringVal(MVT::ValueType VT,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ std::string &Str, unsigned Offset) {
+ uint64_t Val = 0;
+ unsigned MSB = MVT::getSizeInBits(VT) / 8;
+ if (TLI.isLittleEndian())
+ Offset = Offset + MSB - 1;
+ for (unsigned i = 0; i != MSB; ++i) {
+ Val = (Val << 8) | (unsigned char)Str[Offset];
+ Offset += TLI.isLittleEndian() ? -1 : 1;
+ }
+ return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
+ SelectionDAG &DAG) {
+ MVT::ValueType VT = Base.getValueType();
+ return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
}
-SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest,
+/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
+/// to replace the memset / memcpy is below the threshold. It also returns the
+/// types of the sequence of memory ops to perform memset / memcpy.
+static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned Align,
+ const TargetLowering &TLI) {
+ MVT::ValueType VT;
+
+ if (TLI.allowsUnalignedMemoryAccesses()) {
+ VT = MVT::i64;
+ } else {
+ switch (Align & 7) {
+ case 0:
+ VT = MVT::i64;
+ break;
+ case 4:
+ VT = MVT::i32;
+ break;
+ case 2:
+ VT = MVT::i16;
+ break;
+ default:
+ VT = MVT::i8;
+ break;
+ }
+ }
+
+ MVT::ValueType LVT = MVT::i64;
+ while (!TLI.isTypeLegal(LVT))
+ LVT = (MVT::ValueType)((unsigned)LVT - 1);
+ assert(MVT::isInteger(LVT));
+
+ if (VT > LVT)
+ VT = LVT;
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ while (VTSize > Size) {
+ VT = (MVT::ValueType)((unsigned)VT - 1);
+ VTSize >>= 1;
+ }
+ assert(MVT::isInteger(VT));
+
+ if (++NumMemOps > Limit)
+ return false;
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
+static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
+ SDOperand Chain, SDOperand Dst,
+ SDOperand Src, uint64_t Size,
+ unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memcpy to a series of store ops if the size operand falls below
+ // a certain threshold.
+ std::vector<MVT::ValueType> MemOps;
+ uint64_t Limit = -1;
+ if (!AlwaysInline)
+ Limit = TLI.getMaxStoresPerMemcpy();
+ if (!MeetsMaxMemopRequirement(MemOps, Limit, Size, Align, TLI))
+ return SDOperand();
+
+ SmallVector<SDOperand, 8> OutChains;
+
+ unsigned NumMemOps = MemOps.size();
+ unsigned SrcDelta = 0;
+ GlobalAddressSDNode *G = NULL;
+ std::string Str;
+ bool CopyFromStr = false;
+
+ if (Src.getOpcode() == ISD::GlobalAddress)
+ G = cast<GlobalAddressSDNode>(Src);
+ else if (Src.getOpcode() == ISD::ADD &&
+ Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+ Src.getOperand(1).getOpcode() == ISD::Constant) {
+ G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+ SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getValue();
+ }
+ if (G) {
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+ if (GV && GV->isConstant()) {
+ Str = GV->getStringValue(false);
+ if (!Str.empty()) {
+ CopyFromStr = true;
+ SrcOff += SrcDelta;
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value, Store;
+
+ if (CopyFromStr) {
+ Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
+ Store =
+ DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstOff);
+ } else {
+ Value = DAG.getLoad(VT, Chain,
+ getMemBasePlusOffset(Src, SrcOff, DAG),
+ SrcSV, SrcOff, false, Align);
+ Store =
+ DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstOff, false, Align);
+ }
+ OutChains.push_back(Store);
+ SrcOff += VTSize;
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+static SDOperand getMemsetStores(SelectionDAG &DAG,
+ SDOperand Chain, SDOperand Dst,
+ SDOperand Src, uint64_t Size,
+ unsigned Align,
+ Value *DstSV, uint64_t DstOff) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Expand memset to a series of load/store ops if the size operand
+ // falls below a certain threshold.
+ std::vector<MVT::ValueType> MemOps;
+ if (!MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
+ Size, Align, TLI))
+ return SDOperand();
+
+ SmallVector<SDOperand, 8> OutChains;
+
+ unsigned NumMemOps = MemOps.size();
+ for (unsigned i = 0; i < NumMemOps; i++) {
+ MVT::ValueType VT = MemOps[i];
+ unsigned VTSize = MVT::getSizeInBits(VT) / 8;
+ SDOperand Value = getMemsetValue(Src, VT, DAG);
+ SDOperand Store = DAG.getStore(Chain, Value,
+ getMemBasePlusOffset(Dst, DstOff, DAG),
+ DstSV, DstOff);
+ OutChains.push_back(Store);
+ DstOff += VTSize;
+ }
+
+ return DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &OutChains[0], OutChains.size());
+}
+
+SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dst,
+ SDOperand Src, SDOperand Size,
+ unsigned Align, bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+
+ // Check to see if we should lower the memcpy to loads and stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memcpy with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDOperand Result =
+ getMemcpyLoadsAndStores(*this, Chain, Dst, Src, ConstantSize->getValue(),
+ Align, false, DstSV, DstOff, SrcSV, SrcOff);
+ if (Result.Val)
+ return Result;
+ }
+
+ // Then check to see if we should lower the memcpy with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDOperand Result =
+ TLI.EmitTargetCodeForMemcpy(*this, Chain, Dst, Src, Size, Align,
+ AlwaysInline,
+ DstSV, DstOff, SrcSV, SrcOff);
+ if (Result.Val)
+ return Result;
+
+ // If we really need inline code and the target declined to provide it,
+ // use a (potentially long) sequence of loads and stores.
+ if (AlwaysInline) {
+ assert(ConstantSize && "AlwaysInline requires a constant size!");
+ return getMemcpyLoadsAndStores(*this, Chain, Dst, Src,
+ ConstantSize->getValue(), Align, true,
+ DstSV, DstOff, SrcSV, SrcOff);
+ }
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, CallingConv::C, false,
+ getExternalSymbol("memcpy", TLI.getPointerTy()),
+ Args, *this);
+ return CallResult.second;
+}
+
+SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
+ SDOperand Src, SDOperand Size,
+ unsigned Align,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff) {
+
+ // TODO: Optimize small memmove cases with simple loads and stores,
+ // ensuring that all loads precede all stores. This can cause severe
+ // register pressure, so targets should be careful with the size limit.
+
+ // Then check to see if we should lower the memmove with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDOperand Result =
+ TLI.EmitTargetCodeForMemmove(*this, Chain, Dst, Src, Size, Align,
+ DstSV, DstOff, SrcSV, SrcOff);
+ if (Result.Val)
+ return Result;
+
+ // Emit a library call.
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = TLI.getTargetData()->getIntPtrType();
+ Entry.Node = Dst; Args.push_back(Entry);
+ Entry.Node = Src; Args.push_back(Entry);
+ Entry.Node = Size; Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, CallingConv::C, false,
+ getExternalSymbol("memmove", TLI.getPointerTy()),
+ Args, *this);
+ return CallResult.second;
+}
+
+SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dst,
SDOperand Src, SDOperand Size,
- SDOperand Align,
- SDOperand AlwaysInline) {
- SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
- return getNode(ISD::MEMSET, MVT::Other, Ops, 6);
+ unsigned Align,
+ Value *DstSV, uint64_t DstOff) {
+
+ // Check to see if we should lower the memset to stores first.
+ // For cases within the target-specified limits, this is the best choice.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (ConstantSize) {
+ // Memset with size zero? Just return the original chain.
+ if (ConstantSize->isNullValue())
+ return Chain;
+
+ SDOperand Result =
+ getMemsetStores(*this, Chain, Dst, Src, ConstantSize->getValue(), Align,
+ DstSV, DstOff);
+ if (Result.Val)
+ return Result;
+ }
+
+ // Then check to see if we should lower the memset with target-specific
+ // code. If the target chooses to do this, this is the next best.
+ SDOperand Result =
+ TLI.EmitTargetCodeForMemset(*this, Chain, Dst, Src, Size, Align,
+ DstSV, DstOff);
+ if (Result.Val)
+ return Result;
+
+ // Emit a library call.
+ const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ // Extend or truncate the argument to be an i32 value for the call.
+ if (Src.getValueType() > MVT::i32)
+ Src = getNode(ISD::TRUNCATE, MVT::i32, Src);
+ else
+ Src = getNode(ISD::ZERO_EXTEND, MVT::i32, Src);
+ Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
+ Args.push_back(Entry);
+ Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
+ Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ TLI.LowerCallTo(Chain, Type::VoidTy,
+ false, false, false, CallingConv::C, false,
+ getExternalSymbol("memset", TLI.getPointerTy()),
+ Args, *this);
+ return CallResult.second;
}
SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain,
@@ -4009,11 +4339,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STACKRESTORE: return "stackrestore";
case ISD::TRAP: return "trap";
- // Block memory operations.
- case ISD::MEMSET: return "memset";
- case ISD::MEMCPY: return "memcpy";
- case ISD::MEMMOVE: return "memmove";
-
// Bit manipulation
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index cfef9ac..ac5cfd2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -647,8 +647,6 @@ public:
void visitVAEnd(CallInst &I);
void visitVACopy(CallInst &I);
- void visitMemIntrinsic(CallInst &I, unsigned Op);
-
void visitGetResult(GetResultInst &I);
void visitUserOp1(Instruction &I) {
@@ -2737,18 +2735,48 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
return "_longjmp"+!TLI.usesUnderscoreLongJmp();
break;
case Intrinsic::memcpy_i32:
- case Intrinsic::memcpy_i64:
- visitMemIntrinsic(I, ISD::MEMCPY);
+ case Intrinsic::memcpy_i64: {
+ SDOperand Op1 = getValue(I.getOperand(1));
+ SDOperand Op2 = getValue(I.getOperand(2));
+ SDOperand Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
return 0;
+ }
case Intrinsic::memset_i32:
- case Intrinsic::memset_i64:
- visitMemIntrinsic(I, ISD::MEMSET);
+ case Intrinsic::memset_i64: {
+ SDOperand Op1 = getValue(I.getOperand(1));
+ SDOperand Op2 = getValue(I.getOperand(2));
+ SDOperand Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+ DAG.setRoot(DAG.getMemset(getRoot(), Op1, Op2, Op3, Align,
+ I.getOperand(1), 0));
return 0;
+ }
case Intrinsic::memmove_i32:
- case Intrinsic::memmove_i64:
- visitMemIntrinsic(I, ISD::MEMMOVE);
+ case Intrinsic::memmove_i64: {
+ SDOperand Op1 = getValue(I.getOperand(1));
+ SDOperand Op2 = getValue(I.getOperand(2));
+ SDOperand Op3 = getValue(I.getOperand(3));
+ unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
+
+ // If the source and destination are known to not be aliases, we can
+ // lower memmove as memcpy.
+ uint64_t Size = -1ULL;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
+ Size = C->getValue();
+ if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+ AliasAnalysis::NoAlias) {
+ DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align, false,
+ I.getOperand(1), 0, I.getOperand(2), 0));
+ return 0;
+ }
+
+ DAG.setRoot(DAG.getMemmove(getRoot(), Op1, Op2, Op3, Align,
+ I.getOperand(1), 0, I.getOperand(2), 0));
return 0;
-
+ }
case Intrinsic::dbg_stoppoint: {
MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
@@ -4342,242 +4370,6 @@ SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op,
return SDOperand();
}
-/// getMemsetValue - Vectorized representation of the memset value
-/// operand.
-static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
- SelectionDAG &DAG) {
- MVT::ValueType CurVT = VT;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- uint64_t Val = C->getValue() & 255;
- unsigned Shift = 8;
- while (CurVT != MVT::i8) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
- }
- return DAG.getConstant(Val, VT);
- } else {
- Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
- unsigned Shift = 8;
- while (CurVT != MVT::i8) {
- Value =
- DAG.getNode(ISD::OR, VT,
- DAG.getNode(ISD::SHL, VT, Value,
- DAG.getConstant(Shift, MVT::i8)), Value);
- Shift <<= 1;
- CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
- }
-
- return Value;
- }
-}
-
-/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
-/// used when a memcpy is turned into a memset when the source is a constant
-/// string ptr.
-static SDOperand getMemsetStringVal(MVT::ValueType VT,
- SelectionDAG &DAG, TargetLowering &TLI,
- std::string &Str, unsigned Offset) {
- uint64_t Val = 0;
- unsigned MSB = MVT::getSizeInBits(VT) / 8;
- if (TLI.isLittleEndian())
- Offset = Offset + MSB - 1;
- for (unsigned i = 0; i != MSB; ++i) {
- Val = (Val << 8) | (unsigned char)Str[Offset];
- Offset += TLI.isLittleEndian() ? -1 : 1;
- }
- return DAG.getConstant(Val, VT);
-}
-
-/// getMemBasePlusOffset - Returns base and offset node for the
-static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset,
- SelectionDAG &DAG, TargetLowering &TLI) {
- MVT::ValueType VT = Base.getValueType();
- return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT));
-}
-
-/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
-/// to replace the memset / memcpy is below the threshold. It also returns the
-/// types of the sequence of memory ops to perform memset / memcpy.
-static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned Align, TargetLowering &TLI) {
- MVT::ValueType VT;
-
- if (TLI.allowsUnalignedMemoryAccesses()) {
- VT = MVT::i64;
- } else {
- switch (Align & 7) {
- case 0:
- VT = MVT::i64;
- break;
- case 4:
- VT = MVT::i32;
- break;
- case 2:
- VT = MVT::i16;
- break;
- default:
- VT = MVT::i8;
- break;
- }
- }
-
- MVT::ValueType LVT = MVT::i64;
- while (!TLI.isTypeLegal(LVT))
- LVT = (MVT::ValueType)((unsigned)LVT - 1);
- assert(MVT::isInteger(LVT));
-
- if (VT > LVT)
- VT = LVT;
-
- unsigned NumMemOps = 0;
- while (Size != 0) {
- unsigned VTSize = MVT::getSizeInBits(VT) / 8;
- while (VTSize > Size) {
- VT = (MVT::ValueType)((unsigned)VT - 1);
- VTSize >>= 1;
- }
- assert(MVT::isInteger(VT));
-
- if (++NumMemOps > Limit)
- return false;
- MemOps.push_back(VT);
- Size -= VTSize;
- }
-
- return true;
-}
-
-void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) {
- SDOperand Op1 = getValue(I.getOperand(1));
- SDOperand Op2 = getValue(I.getOperand(2));
- SDOperand Op3 = getValue(I.getOperand(3));
- SDOperand Op4 = getValue(I.getOperand(4));
- unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue();
- if (Align == 0) Align = 1;
-
- // If the source and destination are known to not be aliases, we can
- // lower memmove as memcpy.
- if (Op == ISD::MEMMOVE) {
- uint64_t Size = -1ULL;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
- Size = C->getValue();
- if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
- AliasAnalysis::NoAlias)
- Op = ISD::MEMCPY;
- }
-
- if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) {
- std::vector<MVT::ValueType> MemOps;
-
- // Expand memset / memcpy to a series of load / store ops
- // if the size operand falls below a certain threshold.
- SmallVector<SDOperand, 8> OutChains;
- switch (Op) {
- default: break; // Do nothing for now.
- case ISD::MEMSET: {
- if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
- Size->getValue(), Align, TLI)) {
- unsigned NumMemOps = MemOps.size();
- unsigned Offset = 0;
- for (unsigned i = 0; i < NumMemOps; i++) {
- MVT::ValueType VT = MemOps[i];
- unsigned VTSize = MVT::getSizeInBits(VT) / 8;
- SDOperand Value = getMemsetValue(Op2, VT, DAG);
- SDOperand Store = DAG.getStore(getRoot(), Value,
- getMemBasePlusOffset(Op1, Offset, DAG, TLI),
- I.getOperand(1), Offset);
- OutChains.push_back(Store);
- Offset += VTSize;
- }
- }
- break;
- }
- case ISD::MEMCPY: {
- if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(),
- Size->getValue(), Align, TLI)) {
- unsigned NumMemOps = MemOps.size();
- unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;
- GlobalAddressSDNode *G = NULL;
- std::string Str;
- bool CopyFromStr = false;
-
- if (Op2.getOpcode() == ISD::GlobalAddress)
- G = cast<GlobalAddressSDNode>(Op2);
- else if (Op2.getOpcode() == ISD::ADD &&
- Op2.getOperand(0).getOpcode() == ISD::GlobalAddress &&
- Op2.getOperand(1).getOpcode() == ISD::Constant) {
- G = cast<GlobalAddressSDNode>(Op2.getOperand(0));
- SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))->getValue();
- }
- if (G) {
- GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
- if (GV && GV->isConstant()) {
- Str = GV->getStringValue(false);
- if (!Str.empty()) {
- CopyFromStr = true;
- SrcOff += SrcDelta;
- }
- }
- }
-
- for (unsigned i = 0; i < NumMemOps; i++) {
- MVT::ValueType VT = MemOps[i];
- unsigned VTSize = MVT::getSizeInBits(VT) / 8;
- SDOperand Value, Chain, Store;
-
- if (CopyFromStr) {
- Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
- Chain = getRoot();
- Store =
- DAG.getStore(Chain, Value,
- getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
- I.getOperand(1), DstOff);
- } else {
- Value = DAG.getLoad(VT, getRoot(),
- getMemBasePlusOffset(Op2, SrcOff, DAG, TLI),
- I.getOperand(2), SrcOff, false, Align);
- Chain = Value.getValue(1);
- Store =
- DAG.getStore(Chain, Value,
- getMemBasePlusOffset(Op1, DstOff, DAG, TLI),
- I.getOperand(1), DstOff, false, Align);
- }
- OutChains.push_back(Store);
- SrcOff += VTSize;
- DstOff += VTSize;
- }
- }
- break;
- }
- }
-
- if (!OutChains.empty()) {
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
- &OutChains[0], OutChains.size()));
- return;
- }
- }
-
- SDOperand AlwaysInline = DAG.getConstant(0, MVT::i1);
- SDOperand Node;
- switch(Op) {
- default:
- assert(0 && "Unknown Op");
- case ISD::MEMCPY:
- Node = DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
- break;
- case ISD::MEMMOVE:
- Node = DAG.getMemmove(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
- break;
- case ISD::MEMSET:
- Node = DAG.getMemset(getRoot(), Op1, Op2, Op3, Op4, AlwaysInline);
- break;
- }
- DAG.setRoot(Node);
-}
-
//===----------------------------------------------------------------------===//
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a0894dd..f69f046 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -17,7 +17,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/StringExtras.h"
@@ -234,59 +234,6 @@ TargetLowering::TargetLowering(TargetMachine &tm)
TargetLowering::~TargetLowering() {}
-
-SDOperand TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
- assert(getSubtarget() && "Subtarget not defined");
- SDOperand ChainOp = Op.getOperand(0);
- SDOperand DestOp = Op.getOperand(1);
- SDOperand SourceOp = Op.getOperand(2);
- SDOperand CountOp = Op.getOperand(3);
- SDOperand AlignOp = Op.getOperand(4);
- SDOperand AlwaysInlineOp = Op.getOperand(5);
-
- bool AlwaysInline = (bool)cast<ConstantSDNode>(AlwaysInlineOp)->getValue();
- unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue();
- if (Align == 0) Align = 1;
-
- // If size is unknown, call memcpy.
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
- if (!I) {
- assert(!AlwaysInline && "Cannot inline copy of unknown size");
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
- }
-
- // If not DWORD aligned or if size is more than threshold, then call memcpy.
- // The libc version is likely to be faster for the following cases. It can
- // use the address value and run time information about the CPU.
- // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
- unsigned Size = I->getValue();
- if (AlwaysInline ||
- (Size <= getSubtarget()->getMaxInlineSizeThreshold() &&
- (Align & 3) == 0))
- return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-}
-
-
-SDOperand TargetLowering::LowerMEMCPYCall(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- SDOperand Count,
- SelectionDAG &DAG) {
- MVT::ValueType IntPtr = getPointerTy();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Ty = getTargetData()->getIntPtrType();
- Entry.Node = Dest; Args.push_back(Entry);
- Entry.Node = Source; Args.push_back(Entry);
- Entry.Node = Count; Args.push_back(Entry);
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
- false, DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
- return CallResult.second;
-}
-
-
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 7218560..0095352 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -197,11 +197,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
- // Expand mem operations genericly.
- setOperationAction(ISD::MEMSET , MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
- setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
-
// Use the default implementation.
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAARG , MVT::Other, Expand);
@@ -1246,18 +1241,30 @@ static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
}
-SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- unsigned Size,
- unsigned Align,
- SelectionDAG &DAG) {
+SDOperand
+ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff){
// Do repeated 4-byte loads and stores. To be improved.
- assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!");
- unsigned BytesLeft = Size & 3;
- unsigned NumMemOps = Size >> 2;
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDOperand();
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDOperand();
+ uint64_t SizeVal = ConstantSize->getValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDOperand();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
unsigned EmittedNumMemOps = 0;
- unsigned SrcOff = 0, DstOff = 0;
MVT::ValueType VT = MVT::i32;
unsigned VTSize = 4;
unsigned i = 0;
@@ -1272,9 +1279,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
Loads[i] = DAG.getLoad(VT, Chain,
- DAG.getNode(ISD::ADD, MVT::i32, Source,
+ DAG.getNode(ISD::ADD, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- NULL, 0);
+ SrcSV, SrcOff);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
@@ -1283,9 +1290,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
TFOps[i] = DAG.getStore(Chain, Loads[i],
- DAG.getNode(ISD::ADD, MVT::i32, Dest,
+ DAG.getNode(ISD::ADD, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- NULL, 0);
+ DstSV, DstOff);
DstOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i);
@@ -1309,9 +1316,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
}
Loads[i] = DAG.getLoad(VT, Chain,
- DAG.getNode(ISD::ADD, MVT::i32, Source,
+ DAG.getNode(ISD::ADD, MVT::i32, Src,
DAG.getConstant(SrcOff, MVT::i32)),
- NULL, 0);
+ SrcSV, SrcOff);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
@@ -1331,9 +1338,9 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
}
TFOps[i] = DAG.getStore(Chain, Loads[i],
- DAG.getNode(ISD::ADD, MVT::i32, Dest,
+ DAG.getNode(ISD::ADD, MVT::i32, Dst,
DAG.getConstant(DstOff, MVT::i32)),
- NULL, 0);
+ DstSV, DstOff);
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
@@ -1409,7 +1416,6 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::RETURNADDR: break;
case ISD::FRAMEADDR: break;
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
- case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 285a20d..58d8d8c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -119,8 +119,8 @@ namespace llvm {
getRegClassForInlineAsmConstraint(const std::string &Constraint,
MVT::ValueType VT) const;
- virtual const TargetSubtarget* getSubtarget() {
- return static_cast<const TargetSubtarget*>(Subtarget);
+ virtual const ARMSubtarget* getSubtarget() {
+ return Subtarget;
}
private:
@@ -143,11 +143,14 @@ namespace llvm {
SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
- SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest,
- SDOperand Source, unsigned Size,
- unsigned Align, SelectionDAG &DAG);
-
+ SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff);
};
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index c43924b..fbc9e57 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -62,6 +62,8 @@ protected:
///
ARMSubtarget(const Module &M, const std::string &FS, bool thumb);
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const {
// FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
// Change this once Thumb ldmia / stmia support is added.
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index d208f59..91b1180 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -87,10 +87,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM)
setOperationAction(ISD::SDIV , MVT::i64, Custom);
setOperationAction(ISD::UDIV , MVT::i64, Custom);
- setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
- setOperationAction(ISD::MEMSET , MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
-
// We don't support sin/cos/sqrt/pow
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 2922609..1cb6918 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -175,9 +175,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
// SPU has no intrinsics for these particular operations:
- setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
- setOperationAction(ISD::MEMSET, MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
// PowerPC has no SREM/UREM instructions
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
index 2ec08b6..c53f3b4 100644
--- a/lib/Target/IA64/IA64ISelLowering.cpp
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -65,9 +65,6 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
setOperationAction(ISD::UREM , MVT::f32 , Expand);
setOperationAction(ISD::UREM , MVT::f64 , Expand);
- setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
- setOperationAction(ISD::MEMSET , MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 5c2e1c0..5ea9cdd 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -80,9 +80,6 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// Mips not supported intrinsics.
- setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
- setOperationAction(ISD::MEMSET, MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index ddc8e1a..e42e9dc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -78,9 +78,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
// PowerPC has no intrinsics for these particular operations
- setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
- setOperationAction(ISD::MEMSET, MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
// PowerPC has no SREM/UREM instructions
@@ -1735,10 +1732,9 @@ static SDOperand
CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
unsigned Size) {
- SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
- SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
- SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32);
- return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
+ SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false,
+ NULL, 0, NULL, 0);
}
SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG,
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 1d4fe0b..3d5ad0b 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -570,9 +570,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// SPARC has no intrinsics for these particular operations.
- setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
- setOperationAction(ISD::MEMSET, MVT::Other, Expand);
- setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 66384f9..9db0288 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -206,7 +206,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
- setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
@@ -281,9 +280,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
- // X86 wants to expand memset / memcpy itself.
- setOperationAction(ISD::MEMSET , MVT::Other, Custom);
- setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
@@ -1113,10 +1109,10 @@ CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain,
static SDOperand
CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
- SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
- SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
- return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*AlwaysInline=*/true,
+ NULL, 0, NULL, 0);
}
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
@@ -4557,52 +4553,51 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op,
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
}
-SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
- SDOperand InFlag(0, 0);
- SDOperand Chain = Op.getOperand(0);
- unsigned Align =
- (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
- if (Align == 0) Align = 1;
-
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned or size is more than the threshold, call memset.
- // The libc version is likely to be faster for these cases. It can use the
- // address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
+SDOperand
+X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ Value *DstSV, uint64_t DstOff) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ /// If not DWORD aligned or size is more than the threshold, call the library.
+ /// The libc version is likely to be faster for these cases. It can use the
+ /// address value and run time information about the CPU.
+ if ((Align & 3) == 0 ||
+ !ConstantSize ||
+ ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
+ SDOperand InFlag(0, 0);
// Check to see if there is a specialized entry-point for memory zeroing.
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Op.getOperand(2));
- const char *bzeroEntry =
- V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0;
-
- MVT::ValueType IntPtr = getPointerTy();
- const Type *IntPtrTy = getTargetData()->getIntPtrType();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Op.getOperand(1);
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
-
- if (!bzeroEntry) {
- // Extend the unsigned i8 argument to be an int value for the call.
- Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+ if (const char *bzeroEntry =
+ V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
+ false, DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ Args, DAG);
+ return CallResult.second;
}
- Entry.Node = Op.getOperand(3);
- Args.push_back(Entry);
- const char *Name = bzeroEntry ? bzeroEntry : "memset";
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
- false, DAG.getExternalSymbol(Name, IntPtr), Args, DAG);
- return CallResult.second;
+ // Otherwise have the target-independent code call memset.
+ return SDOperand();
}
+ uint64_t SizeVal = ConstantSize->getValue();
+ SDOperand InFlag(0, 0);
MVT::ValueType AVT;
SDOperand Count;
- ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
unsigned BytesLeft = 0;
bool TwoRepStos = false;
if (ValC) {
@@ -4630,22 +4625,14 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
default: // Byte aligned
AVT = MVT::i8;
ValReg = X86::AL;
- Count = Op.getOperand(3);
+ Count = Size;
break;
}
if (AVT > MVT::i8) {
- if (I) {
- unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
- BytesLeft = I->getValue() % UBytes;
- } else {
- assert(AVT >= MVT::i32 &&
- "Do not use rep;stos if not at least DWORD aligned");
- Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
- Op.getOperand(3), DAG.getConstant(2, MVT::i8));
- TwoRepStos = true;
- }
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
}
Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
@@ -4653,8 +4640,8 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
InFlag = Chain.getValue(1);
} else {
AVT = MVT::i8;
- Count = Op.getOperand(3);
- Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
+ Count = Size;
+ Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag);
InFlag = Chain.getValue(1);
}
@@ -4662,7 +4649,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
- Op.getOperand(1), InFlag);
+ Dst, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
@@ -4674,7 +4661,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
if (TwoRepStos) {
InFlag = Chain.getValue(1);
- Count = Op.getOperand(3);
+ Count = Size;
MVT::ValueType CVT = Count.getValueType();
SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
@@ -4688,79 +4675,68 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
} else if (BytesLeft) {
- // Issue stores for the last 1 - 7 bytes.
- SDOperand Value;
- unsigned Val = ValC->getValue() & 255;
- unsigned Offset = I->getValue() - BytesLeft;
- SDOperand DstAddr = Op.getOperand(1);
- MVT::ValueType AddrVT = DstAddr.getValueType();
- if (BytesLeft >= 4) {
- Val = (Val << 8) | Val;
- Val = (Val << 16) | Val;
- Value = DAG.getConstant(Val, MVT::i32);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- BytesLeft -= 4;
- Offset += 4;
- }
- if (BytesLeft >= 2) {
- Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- BytesLeft -= 2;
- Offset += 2;
- }
- if (BytesLeft == 1) {
- Value = DAG.getConstant(Val, MVT::i8);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- }
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT::ValueType AddrVT = Dst.getValueType();
+ MVT::ValueType SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain,
+ DAG.getNode(ISD::ADD, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, DstSV, Offset);
}
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
return Chain;
}
-SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- unsigned Size,
- unsigned Align,
- SelectionDAG &DAG) {
+SDOperand
+X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff){
+
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDOperand();
+ uint64_t SizeVal = ConstantSize->getValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDOperand();
+
+ SmallVector<SDOperand, 4> Results;
+
MVT::ValueType AVT;
unsigned BytesLeft = 0;
- switch (Align & 3) {
- case 2: // WORD aligned
- AVT = MVT::i16;
- break;
- case 0: // DWORD aligned
- AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
- AVT = MVT::i64;
- break;
- default: // Byte aligned
- AVT = MVT::i8;
- break;
- }
+ if (Align >= 8 && Subtarget->is64Bit())
+ AVT = MVT::i64;
+ else if (Align >= 4)
+ AVT = MVT::i32;
+ else if (Align >= 2)
+ AVT = MVT::i16;
+ else
+ AVT = MVT::i8;
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
- BytesLeft = Size % UBytes;
+ unsigned CountVal = SizeVal / UBytes;
+ SDOperand Count = DAG.getIntPtrConstant(CountVal);
+ BytesLeft = SizeVal % UBytes;
SDOperand InFlag(0, 0);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
- Dest, InFlag);
+ Dst, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
- Source, InFlag);
+ Src, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
@@ -4768,57 +4744,28 @@ SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
Ops.push_back(Chain);
Ops.push_back(DAG.getValueType(AVT));
Ops.push_back(InFlag);
- Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
+ Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()));
if (BytesLeft) {
- // Issue loads and stores for the last 1 - 7 bytes.
- unsigned Offset = Size - BytesLeft;
- SDOperand DstAddr = Dest;
- MVT::ValueType DstVT = DstAddr.getValueType();
- SDOperand SrcAddr = Source;
- MVT::ValueType SrcVT = SrcAddr.getValueType();
- SDOperand Value;
- if (BytesLeft >= 4) {
- Value = DAG.getLoad(MVT::i32, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- BytesLeft -= 4;
- Offset += 4;
- }
- if (BytesLeft >= 2) {
- Value = DAG.getLoad(MVT::i16, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- BytesLeft -= 2;
- Offset += 2;
- }
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT::ValueType DstVT = Dst.getValueType();
+ MVT::ValueType SrcVT = Src.getValueType();
+ MVT::ValueType SizeVT = Size.getValueType();
- if (BytesLeft == 1) {
- Value = DAG.getLoad(MVT::i8, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- }
+ Results.push_back(DAG.getMemcpy(Chain,
+ DAG.getNode(ISD::ADD, DstVT, Dst,
+ DAG.getConstant(Offset,
+ DstVT)),
+ DAG.getNode(ISD::ADD, SrcVT, Src,
+ DAG.getConstant(Offset,
+ SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, AlwaysInline,
+ DstSV, Offset, SrcSV, Offset));
}
- return Chain;
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size());
}
/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
@@ -5430,8 +5377,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::CALL: return LowerCALL(Op, DAG);
case ISD::RET: return LowerRET(Op, DAG);
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
- case ISD::MEMSET: return LowerMEMSET(Op, DAG);
- case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d809950..2abe237 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -441,8 +441,8 @@ namespace llvm {
SDOperand Ret,
SelectionDAG &DAG) const;
- virtual const TargetSubtarget* getSubtarget() {
- return static_cast<const TargetSubtarget*>(Subtarget);
+ virtual const X86Subtarget* getSubtarget() {
+ return Subtarget;
}
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
@@ -512,9 +512,6 @@ namespace llvm {
SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
- SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source,
- SDOperand Chain, unsigned Size, unsigned Align,
- SelectionDAG &DAG);
SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG);
@@ -535,6 +532,19 @@ namespace llvm {
SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG);
SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG);
SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG);
+
+ SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ Value *DstSV, uint64_t DstOff);
+ SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ Value *DstSV, uint64_t DstOff,
+ Value *SrcSV, uint64_t SrcOff);
};
}
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.llx b/test/CodeGen/X86/2004-02-12-Memcpy.llx
index 151c5a5..59364c1 100644
--- a/test/CodeGen/X86/2004-02-12-Memcpy.llx
+++ b/test/CodeGen/X86/2004-02-12-Memcpy.llx
@@ -1,5 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
-; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep memcpy | count 2
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3
@A = global [32 x i32] zeroinitializer
@B = global [32 x i32] zeroinitializer
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index f438160..f85c8ff 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -1,7 +1,9 @@
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
-%struct.s = type { i64, i64, i64 }
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+ i64, i64, i64, i64, i64, i64, i64, i64,
+ i64 }
define void @g(i64 %a, i64 %b, i64 %c) {
entry:
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index b3794ec..074bab4 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -1,7 +1,11 @@
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
-%struct.s = type { i32, i32, i32, i32, i32, i32 }
+%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32, i32, i32, i32, i32, i32,
+ i32 }
define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) {
entry:
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 591749f..d2fa9e2 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -1,7 +1,15 @@
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
-%struct.s = type { i16, i16, i16, i16, i16, i16 }
+%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
+ i16, i16, i16, i16, i16, i16, i16, i16,
+ i16, i16, i16, i16, i16, i16, i16, i16,
+ i16, i16, i16, i16, i16, i16, i16, i16,
+ i16, i16, i16, i16, i16, i16, i16, i16,
+ i16, i16, i16, i16, i16, i16, i16, i16,
+ i16, i16, i16, i16, i16, i16, i16, i16,
+ i16, i16, i16, i16, i16, i16, i16, i16,
+ i16 }
define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index 4965d16..fd9c197 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -1,7 +1,23 @@
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
-%struct.s = type { i8, i8, i8, i8, i8, i8 }
+%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8, i8, i8, i8, i8, i8, i8, i8,
+ i8 }
define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3,
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 4199bf0..fcbc59b 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -1,6 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep add | grep 16
- %struct.S = type { <2 x i64> }
+ %struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+ <2 x i64> }
define i32 @main() nounwind {
entry:
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
new file mode 100644
index 0000000..dedd948
--- /dev/null
+++ b/test/CodeGen/X86/small-byval-memcpy.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llc | not grep movs
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %z) nounwind {
+entry:
+ %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+ %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; <x86_fp80*> [#uses=1]
+ %tmp2 = load x86_fp80* %tmp1, align 16 ; <x86_fp80> [#uses=1]
+ %tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2 ; <x86_fp80> [#uses=1]
+ %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ; <x86_fp80*> [#uses=1]
+ %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ; <x86_fp80*> [#uses=1]
+ %tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ; <x86_fp80*> [#uses=1]
+ %tmp7 = load x86_fp80* %tmp6, align 16 ; <x86_fp80> [#uses=1]
+ store x86_fp80 %tmp3, x86_fp80* %real, align 16
+ store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
+ call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %iz ) nounwind
+ ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind
diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
new file mode 100644
index 0000000..b0cdf49
--- /dev/null
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+define void @foo(i8* %p, i64 %n) {
+ call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
+ ret void
+}