diff options
Diffstat (limited to 'lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r-- | lib/Transforms/InstCombine/InstCombineCalls.cpp | 155 |
1 files changed, 61 insertions, 94 deletions
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 21243c2..56b6cd3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -197,12 +197,51 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return nullptr; } +static Value *SimplifyX86insertps(const IntrinsicInst &II, + InstCombiner::BuilderTy &Builder) { + if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + VectorType *VecTy = cast<VectorType>(II.getType()); + ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); + + // The immediate permute control byte looks like this: + // [3:0] - zero mask for each 32-bit lane + // [5:4] - select one 32-bit destination lane + // [7:6] - select one 32-bit source lane + + uint8_t Imm = CInt->getZExtValue(); + uint8_t ZMask = Imm & 0xf; + uint8_t DestLane = (Imm >> 4) & 0x3; + uint8_t SourceLane = (Imm >> 6) & 0x3; + + // If all zero mask bits are set, this was just a weird way to + // generate a zero vector. + if (ZMask == 0xf) + return ZeroVector; + + // TODO: Model this case as two shuffles or a 'logical and' plus shuffle? + if (ZMask) + return nullptr; + + assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type"); + + // If we're not zeroing anything, this is a single shuffle. + // Replace the selected destination lane with the selected source lane. + // For all other lanes, pass the first source bits through. + int ShuffleMask[4] = { 0, 1, 2, 3 }; + ShuffleMask[DestLane] = SourceLane + 4; + + return Builder.CreateShuffleVector(II.getArgOperand(0), II.getArgOperand(1), + ShuffleMask); + } + return nullptr; +} + /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit /// source vectors, unless a zero bit is set. If a zero bit is set, /// then ignore that half of the mask and clear that half of the vector. static Value *SimplifyX86vperm2(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder) { - if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { + if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) { VectorType *VecTy = cast<VectorType>(II.getType()); ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy); @@ -415,112 +454,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; - case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, II); - if (OR == OverflowResult::NeverOverflows) - return CreateOverflowTuple(II, Builder->CreateNUWAdd(LHS, RHS), false); - if (OR == OverflowResult::AlwaysOverflows) - return CreateOverflowTuple(II, Builder->CreateAdd(LHS, RHS), true); - } - // FALL THROUGH uadd into sadd + + case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - // Canonicalize constants into the RHS. + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: if (isa<Constant>(II->getArgOperand(0)) && !isa<Constant>(II->getArgOperand(1))) { + // Canonicalize constants into the RHS. Value *LHS = II->getArgOperand(0); II->setArgOperand(0, II->getArgOperand(1)); II->setArgOperand(1, LHS); return II; } + // fall through - // X + undef -> undef - if (isa<UndefValue>(II->getArgOperand(1))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // X + 0 -> {X, false} - if (RHS->isZero()) { - return CreateOverflowTuple(II, II->getArgOperand(0), false, - /*ReUseName*/false); - } - } - - // We can strength reduce reduce this signed add into a regular add if we - // can prove that it will never overflow. - if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedAdd(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false); - } - } - - break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - // undef - X -> undef - // X - undef -> undef - if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - - if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) { - // X - 0 -> {X, false} - if (ConstRHS->isZero()) { - return CreateOverflowTuple(II, LHS, false, /*ReUseName*/false); - } - } - if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) { - if (WillNotOverflowSignedSub(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false); - } - } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false); - } - } - break; - } - case Intrinsic::umul_with_overflow: { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, II); - if (OR == OverflowResult::NeverOverflows) - return CreateOverflowTuple(II, Builder->CreateNUWMul(LHS, RHS), false); - if (OR == OverflowResult::AlwaysOverflows) - return CreateOverflowTuple(II, Builder->CreateMul(LHS, RHS), true); - } // FALL THROUGH - case Intrinsic::smul_with_overflow: - // Canonicalize constants into the RHS. - if (isa<Constant>(II->getArgOperand(0)) && - !isa<Constant>(II->getArgOperand(1))) { - Value *LHS = II->getArgOperand(0); - II->setArgOperand(0, II->getArgOperand(1)); - II->setArgOperand(1, LHS); - return II; - } - - // X * undef -> undef - if (isa<UndefValue>(II->getArgOperand(1))) - return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + OverflowCheckFlavor OCF = + IntrinsicIDToOverflowCheckFlavor(II->getIntrinsicID()); + assert(OCF != OCF_INVALID && "unexpected!"); - if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { - // X*0 -> {0, false} - if (RHSI->isZero()) - return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); + Value *OperationResult = nullptr; + Constant *OverflowResult = nullptr; + if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1), + *II, OperationResult, OverflowResult)) + return CreateOverflowTuple(II, OperationResult, OverflowResult); - // X * 1 -> {X, false} - if (RHSI->equalsInt(1)) { - return CreateOverflowTuple(II, II->getArgOperand(0), false, - /*ReUseName*/false); - } - } - if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) { - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - if (WillNotOverflowSignedMul(LHS, RHS, *II)) { - return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false); - } - } break; + } + case Intrinsic::minnum: case Intrinsic::maxnum: { Value *Arg0 = II->getArgOperand(0); @@ -806,7 +769,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; } - + case Intrinsic::x86_sse41_insertps: + if (Value *V = SimplifyX86insertps(*II, *Builder)) + return ReplaceInstUsesWith(*II, V); + break; + case Intrinsic::x86_sse4a_insertqi: { // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top // ones undef |