aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Vectorize/BBVectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Vectorize/BBVectorize.cpp')
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp78
1 files changed, 39 insertions, 39 deletions
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index cbc1d63..c5e1dcb 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -533,7 +533,7 @@ namespace {
default: break;
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because scalar GEPs are usually
- // lowered to the intruction addressing mode. At the moment we don't
+ // lowered to the instruction addressing mode. At the moment we don't
// generate vector GEPs.
return 0;
case Instruction::Br:
@@ -625,10 +625,10 @@ namespace {
ConstantInt *IntOff = ConstOffSCEV->getValue();
int64_t Offset = IntOff->getSExtValue();
- Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+ Type *VTy = IPtr->getType()->getPointerElementType();
int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
- Type *VTy2 = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
int64_t VTy2TSS = (int64_t) TD->getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
@@ -1182,6 +1182,8 @@ namespace {
// Look for an instruction with which to pair instruction *I...
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
bool JAfterStart = IAfterStart;
BasicBlock::iterator J = llvm::next(I);
for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
@@ -1403,6 +1405,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (BasicBlock::iterator J = llvm::next(I); J != E; ++J) {
(void) trackUsesOfI(Users, WriteSet, I, J);
@@ -2227,11 +2231,12 @@ namespace {
// The pointer value is taken to be the one with the lowest offset.
Value *VPtr = IPtr;
- Type *ArgTypeI = cast<PointerType>(IPtr->getType())->getElementType();
- Type *ArgTypeJ = cast<PointerType>(JPtr->getType())->getElementType();
+ Type *ArgTypeI = IPtr->getType()->getPointerElementType();
+ Type *ArgTypeJ = JPtr->getType()->getPointerElementType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- Type *VArgPtrType = PointerType::get(VArgType,
- cast<PointerType>(IPtr->getType())->getAddressSpace());
+ Type *VArgPtrType
+ = PointerType::get(VArgType,
+ IPtr->getType()->getPointerAddressSpace());
return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
/* insert before */ I);
}
@@ -2240,7 +2245,7 @@ namespace {
unsigned MaskOffset, unsigned NumInElem,
unsigned NumInElem1, unsigned IdxOffset,
std::vector<Constant*> &Mask) {
- unsigned NumElem1 = cast<VectorType>(J->getType())->getNumElements();
+ unsigned NumElem1 = J->getType()->getVectorNumElements();
for (unsigned v = 0; v < NumElem1; ++v) {
int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
if (m < 0) {
@@ -2267,18 +2272,18 @@ namespace {
Type *ArgTypeJ = J->getType();
Type *VArgType = getVecTypeForPair(ArgTypeI, ArgTypeJ);
- unsigned NumElemI = cast<VectorType>(ArgTypeI)->getNumElements();
+ unsigned NumElemI = ArgTypeI->getVectorNumElements();
// Get the total number of elements in the fused vector type.
// By definition, this must equal the number of elements in
// the final mask.
- unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned NumElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(NumElem);
Type *OpTypeI = I->getOperand(0)->getType();
- unsigned NumInElemI = cast<VectorType>(OpTypeI)->getNumElements();
+ unsigned NumInElemI = OpTypeI->getVectorNumElements();
Type *OpTypeJ = J->getOperand(0)->getType();
- unsigned NumInElemJ = cast<VectorType>(OpTypeJ)->getNumElements();
+ unsigned NumInElemJ = OpTypeJ->getVectorNumElements();
// The fused vector will be:
// -----------------------------------------------------
@@ -2340,6 +2345,12 @@ namespace {
return ExpandedIEChain;
}
+ static unsigned getNumScalarElements(Type *Ty) {
+ if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
+ return VecTy->getNumElements();
+ return 1;
+ }
+
// Returns the value to be used as the specified operand of the vector
// instruction that fuses I with J.
Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
@@ -2355,17 +2366,8 @@ namespace {
Instruction *L = I, *H = J;
Type *ArgTypeL = ArgTypeI, *ArgTypeH = ArgTypeJ;
- unsigned numElemL;
- if (ArgTypeL->isVectorTy())
- numElemL = cast<VectorType>(ArgTypeL)->getNumElements();
- else
- numElemL = 1;
-
- unsigned numElemH;
- if (ArgTypeH->isVectorTy())
- numElemH = cast<VectorType>(ArgTypeH)->getNumElements();
- else
- numElemH = 1;
+ unsigned numElemL = getNumScalarElements(ArgTypeL);
+ unsigned numElemH = getNumScalarElements(ArgTypeH);
Value *LOp = L->getOperand(o);
Value *HOp = H->getOperand(o);
@@ -2426,11 +2428,12 @@ namespace {
if (CanUseInputs) {
unsigned LOpElem =
- cast<VectorType>(cast<Instruction>(LOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(LOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
+
unsigned HOpElem =
- cast<VectorType>(cast<Instruction>(HOp)->getOperand(0)->getType())
- ->getNumElements();
+ cast<Instruction>(HOp)->getOperand(0)->getType()
+ ->getVectorNumElements();
// We have one or two input vectors. We need to map each index of the
// operands to the index of the original vector.
@@ -2646,14 +2649,14 @@ namespace {
getReplacementName(IBeforeJ ? I : J,
true, o, 1));
}
-
+
NHOp->insertBefore(IBeforeJ ? J : I);
HOp = NHOp;
}
}
if (ArgType->isVectorTy()) {
- unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+ unsigned numElem = VArgType->getVectorNumElements();
std::vector<Constant*> Mask(numElem);
for (unsigned v = 0; v < numElem; ++v) {
unsigned Idx = v;
@@ -2746,16 +2749,8 @@ namespace {
VectorType *VType = getVecTypeForPair(IType, JType);
unsigned numElem = VType->getNumElements();
- unsigned numElemI, numElemJ;
- if (IType->isVectorTy())
- numElemI = cast<VectorType>(IType)->getNumElements();
- else
- numElemI = 1;
-
- if (JType->isVectorTy())
- numElemJ = cast<VectorType>(JType)->getNumElements();
- else
- numElemJ = 1;
+ unsigned numElemI = getNumScalarElements(IType);
+ unsigned numElemJ = getNumScalarElements(JType);
if (IType->isVectorTy()) {
std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
@@ -2804,6 +2799,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J; ++L)
(void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
@@ -2824,6 +2821,8 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
+
for (; cast<Instruction>(L) != J;) {
if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
// Move this instruction
@@ -2853,6 +2852,7 @@ namespace {
DenseSet<Value *> Users;
AliasSetTracker WriteSet(*AA);
+ if (I->mayWriteToMemory()) WriteSet.add(I);
// Note: We cannot end the loop when we reach J because J could be moved
// farther down the use chain by another instruction pairing. Also, J