diff options
Diffstat (limited to 'lib')
566 files changed, 33520 insertions, 8199 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index e79459d..cb1e1eb 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -58,10 +58,4 @@ add_llvm_library(LLVMAnalysis ValueTracking.cpp ) -add_llvm_library_dependencies(LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - ) - add_subdirectory(IPA) diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index df79849..8e2f263 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -26,6 +26,7 @@ #include "llvm/Operator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/ErrorHandling.h" @@ -542,8 +543,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// explicitly cast them so that they aren't implicitly casted by the /// getelementptr. static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, - Type *ResultTy, - const TargetData *TD) { + Type *ResultTy, const TargetData *TD, + const TargetLibraryInfo *TLI) { if (!TD) return 0; Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); @@ -568,7 +569,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; return C; } @@ -576,10 +577,11 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, - Type *ResultTy, - const TargetData *TD) { + Type *ResultTy, const TargetData *TD, + const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; - if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized()) + if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() || + !Ptr->getType()->isPointerTy()) return 0; Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); @@ -602,7 +604,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Res = ConstantExpr::getSub(Res, CE->getOperand(1)); Res = ConstantExpr::getIntToPtr(Res, ResultTy); if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res)) - Res = ConstantFoldConstantExpression(ResCE, TD); + Res = ConstantFoldConstantExpression(ResCE, TD, TLI); return Res; } } @@ -729,7 +731,9 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads /// and stores, which have no constant expression form. -Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { +Constant *llvm::ConstantFoldInstruction(Instruction *I, + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast<PHINode>(I)) { Constant *CommonValue = 0; @@ -765,7 +769,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { if (const CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD); + TD, TLI); if (const LoadInst *LI = dyn_cast<LoadInst>(I)) return ConstantFoldLoadInst(LI, TD); @@ -781,28 +785,29 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { cast<Constant>(EVI->getAggregateOperand()), EVI->getIndices()); - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { SmallVector<Constant*, 8> Ops; for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { Constant *NewC = cast<Constant>(*i); // Recursively fold the ConstantExpr's operands. if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) - NewC = ConstantFoldConstantExpression(NewCE, TD); + NewC = ConstantFoldConstantExpression(NewCE, TD, TLI); Ops.push_back(NewC); } if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], - TD); - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD); + TD, TLI); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -817,7 +822,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, ArrayRef<Constant *> Ops, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) @@ -834,7 +840,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::Call: if (Function *F = dyn_cast<Function>(Ops.back())) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1)); + return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing @@ -888,9 +894,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = CastGEPIndices(Ops, DestTy, TD)) + if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI)) return C; - if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI)) return C; return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1)); @@ -903,7 +909,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, /// Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Ops0, Constant *Ops1, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y @@ -920,7 +927,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } // Only do this transformation if the int is intptrty in size, otherwise @@ -929,7 +936,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); } } @@ -944,7 +951,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, IntPtrTy, false); Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); - return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD); + return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI); } // Only do this transformation if the int is intptrty in size, otherwise @@ -953,7 +960,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, CE0->getType() == IntPtrTy && CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), - CE1->getOperand(0), TD); + CE1->getOperand(0), TD, TLI); } } @@ -962,13 +969,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) && CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) { Constant *LHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD); + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1, + TD, TLI); Constant *RHS = - ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD); + ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1, + TD, TLI); unsigned OpC = Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; Constant *Ops[] = { LHS, RHS }; - return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD); + return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI); } } @@ -1045,6 +1054,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { case Intrinsic::sqrt: + case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::bswap: case Intrinsic::ctpop: @@ -1168,7 +1178,8 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero, /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant * -llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { +llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, + const TargetLibraryInfo *TLI) { if (!F->hasName()) return 0; StringRef Name = F->getName(); @@ -1183,6 +1194,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); } + if (!TLI) + return 0; if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; @@ -1201,43 +1214,43 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { Op->getValueAPF().convertToDouble(); switch (Name[0]) { case 'a': - if (Name == "acos") + if (Name == "acos" && TLI->has(LibFunc::acos)) return ConstantFoldFP(acos, V, Ty); - else if (Name == "asin") + else if (Name == "asin" && TLI->has(LibFunc::asin)) return ConstantFoldFP(asin, V, Ty); - else if (Name == "atan") + else if (Name == "atan" && TLI->has(LibFunc::atan)) return ConstantFoldFP(atan, V, Ty); break; case 'c': - if (Name == "ceil") + if (Name == "ceil" && TLI->has(LibFunc::ceil)) return ConstantFoldFP(ceil, V, Ty); - else if (Name == "cos") + else if (Name == "cos" && TLI->has(LibFunc::cos)) return ConstantFoldFP(cos, V, Ty); - else if (Name == "cosh") + else if (Name == "cosh" && TLI->has(LibFunc::cosh)) return ConstantFoldFP(cosh, V, Ty); - else if (Name == "cosf") + else if (Name == "cosf" && TLI->has(LibFunc::cosf)) return ConstantFoldFP(cos, V, Ty); break; case 'e': - if (Name == "exp") + if (Name == "exp" && TLI->has(LibFunc::exp)) return ConstantFoldFP(exp, V, Ty); - if (Name == "exp2") { + if (Name == "exp2" && TLI->has(LibFunc::exp2)) { // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); } break; case 'f': - if (Name == "fabs") + if (Name == "fabs" && TLI->has(LibFunc::fabs)) return ConstantFoldFP(fabs, V, Ty); - else if (Name == "floor") + else if (Name == "floor" && TLI->has(LibFunc::floor)) return ConstantFoldFP(floor, V, Ty); break; case 'l': - if (Name == "log" && V > 0) + if (Name == "log" && V > 0 && TLI->has(LibFunc::log)) return ConstantFoldFP(log, V, Ty); - else if (Name == "log10" && V > 0) + else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && (Ty->isFloatTy() || Ty->isDoubleTy())) { @@ -1248,21 +1261,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { } break; case 's': - if (Name == "sin") + if (Name == "sin" && TLI->has(LibFunc::sin)) return ConstantFoldFP(sin, V, Ty); - else if (Name == "sinh") + else if (Name == "sinh" && TLI->has(LibFunc::sinh)) return ConstantFoldFP(sinh, V, Ty); - else if (Name == "sqrt" && V >= 0) + else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sqrtf" && V >= 0) + else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)) return ConstantFoldFP(sqrt, V, Ty); - else if (Name == "sinf") + else if (Name == "sinf" && TLI->has(LibFunc::sinf)) return ConstantFoldFP(sin, V, Ty); break; case 't': - if (Name == "tan") + if (Name == "tan" && TLI->has(LibFunc::tan)) return ConstantFoldFP(tan, V, Ty); - else if (Name == "tanh") + else if (Name == "tanh" && TLI->has(LibFunc::tanh)) return ConstantFoldFP(tanh, V, Ty); break; default: @@ -1277,10 +1290,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { return ConstantInt::get(F->getContext(), Op->getValue().byteSwap()); case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); - case Intrinsic::cttz: - return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); - case Intrinsic::ctlz: - return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); case Intrinsic::convert_from_fp16: { APFloat Val(Op->getValue()); @@ -1337,16 +1346,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); - if (Name == "pow") + if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - if (Name == "fmod") + } + if (!TLI) + return 0; + if (Name == "pow" && TLI->has(LibFunc::pow)) + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); + if (Name == "fmod" && TLI->has(LibFunc::fmod)) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - if (Name == "atan2") + if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) { if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) @@ -1361,7 +1375,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { return 0; } - if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) { if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) { switch (F->getIntrinsicID()) { @@ -1401,6 +1414,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) { }; return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops); } + case Intrinsic::cttz: + // FIXME: This should check for Op2 == 1, and become unreachable if + // Op1 == 0. + return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); + case Intrinsic::ctlz: + // FIXME: This should check for Op2 == 1, and become unreachable if + // Op1 == 0. + return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index ed3e8f4..85dcc46 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -777,7 +777,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index eae83fd..8ffef29 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -5,9 +5,3 @@ add_llvm_library(LLVMipa GlobalsModRef.cpp IPA.cpp ) - -add_llvm_library_dependencies(LLVMipa - LLVMAnalysis - LLVMCore - LLVMSupport - ) diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/Analysis/IPA/LLVMBuild.txt index fb16278..980e918 100644 --- a/lib/Analysis/IPA/LLVMBuild.txt +++ b/lib/Analysis/IPA/LLVMBuild.txt @@ -21,4 +21,3 @@ name = IPA parent = Libraries library_name = ipa required_libraries = Analysis Core Support - diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 2f41f72..f1cfd6c 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -38,22 +38,25 @@ STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc, "Number of reassociations"); static Value *SimplifyAndInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); + const TargetLibraryInfo *, const DominatorTree *, + unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); + const TargetLibraryInfo *, const DominatorTree *, + unsigned); static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); + const TargetLibraryInfo *, const DominatorTree *, + unsigned); static Value *SimplifyOrInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); + const TargetLibraryInfo *, const DominatorTree *, + unsigned); static Value *SimplifyXorInst(Value *, Value *, const TargetData *, - const DominatorTree *, unsigned); + const TargetLibraryInfo *, const DominatorTree *, + unsigned); /// getFalse - For a boolean type, or a vector of boolean type, return false, or /// a vector with every element false, as appropriate for the type. static Constant *getFalse(Type *Ty) { - assert((Ty->isIntegerTy(1) || - (Ty->isVectorTy() && - cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + assert(Ty->getScalarType()->isIntegerTy(1) && "Expected i1 type or a vector of i1!"); return Constant::getNullValue(Ty); } @@ -61,9 +64,7 @@ static Constant *getFalse(Type *Ty) { /// getTrue - For a boolean type, or a vector of boolean type, return true, or /// a vector with every element true, as appropriate for the type. static Constant *getTrue(Type *Ty) { - assert((Ty->isIntegerTy(1) || - (Ty->isVectorTy() && - cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) && + assert(Ty->getScalarType()->isIntegerTy(1) && "Expected i1 type or a vector of i1!"); return Constant::getAllOnesValue(Ty); } @@ -109,7 +110,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, unsigned OpcToExpand, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { + const TargetLibraryInfo *TLI, const DominatorTree *DT, + unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -121,8 +123,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op C) op' (B op C)". Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) @@ -131,7 +133,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return LHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT, MaxRecurse)) { ++NumExpand; return V; @@ -145,8 +147,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op B) op' (A op C)". Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) @@ -155,7 +157,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return RHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT, + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT, MaxRecurse)) { ++NumExpand; return V; @@ -171,8 +173,10 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". /// Returns the simplified value, or null if no simplification was performed. static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExtract, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { + unsigned OpcToExtract, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, + unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -196,7 +200,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *DD = A == C ? D : C; // Form "A op' (B op DD)" if it simplifies completely. // Does "B op DD" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, TLI, DT, MaxRecurse)) { // It does! Return "A op' V" if it simplifies or is already available. // If V equals B then "A op' V" is just the LHS. If V equals DD then // "A op' V" is just the RHS. @@ -205,7 +209,8 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == B ? LHS : RHS; } // Otherwise return "A op' V" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, TLI, DT, + MaxRecurse)) { ++NumFactor; return W; } @@ -219,7 +224,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *CC = B == D ? C : D; // Form "(A op CC) op' B" if it simplifies completely.. // Does "A op CC" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, TLI, DT, MaxRecurse)) { // It does! Return "V op' B" if it simplifies or is already available. // If V equals A then "V op' B" is just the LHS. If V equals CC then // "V op' B" is just the RHS. @@ -228,7 +233,8 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == A ? LHS : RHS; } // Otherwise return "V op' B" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, TLI, DT, + MaxRecurse)) { ++NumFactor; return W; } @@ -242,6 +248,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// operations. Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; @@ -261,12 +268,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) { // It does! Return "A op V" if it simplifies or is already available. // If V equals B then "A op V" is just the LHS. if (V == B) return LHS; // Otherwise return "A op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, A, V, TD, TLI, DT, MaxRecurse)) { ++NumReassoc; return W; } @@ -280,12 +287,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) { // It does! Return "V op C" if it simplifies or is already available. // If V equals B then "V op C" is just the RHS. if (V == B) return RHS; // Otherwise return "V op C" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, C, TD, TLI, DT, MaxRecurse)) { ++NumReassoc; return W; } @@ -303,12 +310,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) { // It does! Return "V op B" if it simplifies or is already available. // If V equals A then "V op B" is just the LHS. if (V == A) return LHS; // Otherwise return "V op B" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, B, TD, TLI, DT, MaxRecurse)) { ++NumReassoc; return W; } @@ -322,12 +329,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) { // It does! Return "B op V" if it simplifies or is already available. // If V equals C then "B op V" is just the RHS. if (V == C) return RHS; // Otherwise return "B op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, B, V, TD, TLI, DT, MaxRecurse)) { ++NumReassoc; return W; } @@ -343,6 +350,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, /// Returns the common value if so, otherwise returns null. static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -361,11 +369,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, Value *TV; Value *FV; if (SI == LHS) { - TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, TLI, DT, MaxRecurse); + FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, TLI, DT, MaxRecurse); } else { - TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, TLI, DT, MaxRecurse); + FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, TLI, DT, MaxRecurse); } // If they simplified to the same value, then return the common value. @@ -417,6 +425,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, /// null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. @@ -436,7 +445,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, DT, MaxRecurse); + Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, TLI, DT, MaxRecurse); if (TCmp == Cond) { // It not only simplified, it simplified to the select condition. Replace // it with 'true'. @@ -450,7 +459,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, } // Does "cmp FV, RHS" simplify? - Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, DT, MaxRecurse); + Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, TLI, DT, MaxRecurse); if (FCmp == Cond) { // It not only simplified, it simplified to the select condition. Replace // it with 'false'. @@ -471,19 +480,19 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // is equal to "Cond && TCmp". This also catches the case when the false // value simplified to false and the true value to true, returning "Cond". if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) + if (Value *V = SimplifyAndInst(Cond, TCmp, TD, TLI, DT, MaxRecurse)) return V; // If the true value simplified to true, then the result of the compare // is equal to "Cond || FCmp". if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) + if (Value *V = SimplifyOrInst(Cond, FCmp, TD, TLI, DT, MaxRecurse)) return V; // Finally, if the false value simplified to true and the true value to // false, then the result of the compare is equal to "!Cond". if (match(FCmp, m_One()) && match(TCmp, m_Zero())) if (Value *V = SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; return 0; @@ -494,7 +503,9 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// it on the incoming phi values yields the same result for every value. If so /// returns the common value, otherwise returns null. static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -521,8 +532,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = PI == LHS ? - SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) : - SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse); + SimplifyBinOp(Opcode, Incoming, RHS, TD, TLI, DT, MaxRecurse) : + SimplifyBinOp(Opcode, LHS, Incoming, TD, TLI, DT, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -538,7 +549,9 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, /// incoming phi values yields the same result every time. If so returns the /// common result, otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -562,7 +575,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse); + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, TLI, DT, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -576,13 +589,15 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, /// SimplifyAddInst - Given operands for an Add, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), - Ops, TD); + Ops, TD, TLI); } // Canonicalize the constant to the RHS. @@ -612,17 +627,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// i1 add -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // Threading Add over selects and phi nodes is pointless, so don't bother. @@ -638,20 +653,23 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit); } /// SimplifySubInst - Given operands for a Sub, see if we can /// fold the result. If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), - Ops, TD); + Ops, TD, TLI); } // X - undef -> undef @@ -679,18 +697,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *Y = 0, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, TLI, DT, MaxRecurse-1)) // It does! Now see if "X + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT, + if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, TLI, DT, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1)) // It does! Now see if "Y + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT, + if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, TLI, DT, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; @@ -703,18 +721,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, X = Op0; if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) // See if "V === X - Y" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, TLI, DT, MaxRecurse-1)) // It does! Now see if "V - Z" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT, + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, TLI, DT, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1)) // It does! Now see if "V - Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT, + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, TLI, DT, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; @@ -727,9 +745,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Z = Op0; if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) // See if "V === Z - X" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, TLI, DT, MaxRecurse-1)) // It does! Now see if "V + Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT, + if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, TLI, DT, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; @@ -738,12 +756,12 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // Mul distributes over Sub. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1)) return V; // Threading Sub over selects and phi nodes is pointless, so don't bother. @@ -759,19 +777,22 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit); } /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), - Ops, TD); + Ops, TD, TLI); } // Canonicalize the constant to the RHS. @@ -802,30 +823,30 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, // i1 mul -> and. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyAndInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; @@ -833,19 +854,20 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyMulInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can /// fold the result. If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI); } } @@ -898,13 +920,15 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, + MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, + MaxRecurse)) return V; return 0; @@ -913,34 +937,41 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySDivInst - Given operands for an SDiv, see if we can /// fold the result. If not, this returns null. static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, TLI, DT, + MaxRecurse)) return V; return 0; } Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifySDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, TLI, DT, + MaxRecurse)) return V; return 0; } Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyUDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, + const TargetLibraryInfo *, const DominatorTree *, unsigned) { // undef / X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) @@ -954,19 +985,20 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyFDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } /// SimplifyRem - Given operands for an SRem or URem, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI); } } @@ -1001,13 +1033,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; return 0; @@ -1016,35 +1048,43 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse)) + const TargetLibraryInfo *TLI, + const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; return 0; } Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifySRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } /// SimplifyURemInst - Given operands for a URem, see if we can /// fold the result. If not, this returns null. static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse)) + const TargetLibraryInfo *TLI, + const DominatorTree *DT, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyURemInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, - const DominatorTree *, unsigned) { + const TargetLibraryInfo *, + const DominatorTree *, + unsigned) { // undef % X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -1057,19 +1097,20 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyFRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI); } } @@ -1094,13 +1135,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; return 0; @@ -1109,9 +1150,10 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, /// SimplifyShlInst - Given operands for an Shl, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse)) + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // undef << X -> 0 @@ -1127,16 +1169,19 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit); } /// SimplifyLShrInst - Given operands for an LShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // undef >>l X -> 0 @@ -1153,16 +1198,20 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyLShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit); } /// SimplifyAShrInst - Given operands for an AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse)) + if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // all ones >>a X -> all ones @@ -1183,19 +1232,23 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyAShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit); } /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), - Ops, TD); + Ops, TD, TLI); } // Canonicalize the constant to the RHS. @@ -1244,36 +1297,36 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, } // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT, - MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, TLI, + DT, MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, TLI, + DT, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; @@ -1281,19 +1334,21 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyAndInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), - Ops, TD); + Ops, TD, TLI); } // Canonicalize the constant to the RHS. @@ -1343,31 +1398,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT, - MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, TLI, + DT, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. - if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, TD, + TLI, DT, MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, TLI, DT, MaxRecurse)) return V; @@ -1375,19 +1430,21 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyOrInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } /// SimplifyXorInst - Given operands for a Xor, see if we can /// fold the result. If not, this returns null. static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), - Ops, TD); + Ops, TD, TLI); } // Canonicalize the constant to the RHS. @@ -1412,13 +1469,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT, - MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, TLI, + DT, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, - TD, DT, MaxRecurse)) + TD, TLI, DT, MaxRecurse)) return V; // Threading Xor over selects and phi nodes is pointless, so don't bother. @@ -1434,8 +1491,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, } Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit); + return ::SimplifyXorInst(Op0, Op1, TD, TLI, DT, RecursionLimit); } static Type *GetCompareTy(Value *Op) { @@ -1465,14 +1523,16 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -1489,8 +1549,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); // Special case logic when the operands have i1 type. - if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() && - cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) { + if (OpTy->getScalarType()->isIntegerTy(1)) { switch (Pred) { default: break; case ICmpInst::ICMP_EQ: @@ -1671,13 +1730,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, ConstantExpr::getIntToPtr(RHSC, SrcTy), - TD, DT, MaxRecurse-1)) + TD, TLI, DT, MaxRecurse-1)) return V; } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { if (RI->getOperand(0)->getType() == SrcTy) // Compare without the cast. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, DT, MaxRecurse-1)) + TD, TLI, DT, MaxRecurse-1)) return V; } } @@ -1689,7 +1748,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that signed predicates become unsigned. if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, RI->getOperand(0), TD, DT, + SrcOp, RI->getOperand(0), TD, TLI, DT, MaxRecurse-1)) return V; } @@ -1705,7 +1764,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // also a case of comparing two zero-extended values. if (RExt == CI && MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, Trunc, TD, DT, MaxRecurse-1)) + SrcOp, Trunc, TD, TLI, DT, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit @@ -1750,7 +1809,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that the predicate does not change. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, DT, MaxRecurse-1)) + TD, TLI, DT, MaxRecurse-1)) return V; } // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended @@ -1764,7 +1823,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the re-extended constant didn't change then this is effectively // also a case of comparing two sign-extended values. if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT, + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, TLI, DT, MaxRecurse-1)) return V; @@ -1800,7 +1859,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, Constant::getNullValue(SrcTy), - TD, DT, MaxRecurse-1)) + TD, TLI, DT, MaxRecurse-1)) return V; break; case ICmpInst::ICMP_ULT: @@ -1809,7 +1868,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, Constant::getNullValue(SrcTy), - TD, DT, MaxRecurse-1)) + TD, TLI, DT, MaxRecurse-1)) return V; break; } @@ -1843,14 +1902,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if ((A == RHS || B == RHS) && NoLHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, Constant::getNullValue(RHS->getType()), - TD, DT, MaxRecurse-1)) + TD, TLI, DT, MaxRecurse-1)) return V; // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. if ((C == LHS || D == LHS) && NoRHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), - C == LHS ? D : C, TD, DT, MaxRecurse-1)) + C == LHS ? D : C, TD, TLI, DT, MaxRecurse-1)) return V; // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. @@ -1859,7 +1918,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y = (A == C || A == D) ? B : A; Value *Z = (C == A || C == B) ? D : C; - if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, TLI, DT, MaxRecurse-1)) return V; } } @@ -1942,7 +2001,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, DT, MaxRecurse-1)) + RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1)) return V; break; case Instruction::Shl: { @@ -1953,7 +2012,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!NSW && ICmpInst::isSigned(Pred)) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, DT, MaxRecurse-1)) + RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1)) return V; break; } @@ -2007,7 +2066,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -2021,7 +2080,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1)) return V; break; } @@ -2077,7 +2136,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -2091,7 +2150,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1)) return V; break; } @@ -2151,34 +2210,38 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit); } /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -2246,21 +2309,23 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) return V; return 0; } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit); } /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold @@ -2291,10 +2356,13 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, - const TargetData *TD, const DominatorTree *) { +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, + const DominatorTree *) { // The type of the GEP pointer operand. - PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()); + PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType()); + // The GEP pointer operand is not a pointer, it's a vector of pointers. + if (!PtrTy) + return 0; // getelementptr P -> P. if (Ops.size() == 1) @@ -2392,62 +2460,76 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { return CommonValue; } - //=== Helper functions for higher up the class hierarchy. /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); + TD, TLI, DT, MaxRecurse); case Instruction::Sub: return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); - case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse); - case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse); + TD, TLI, DT, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, DT, MaxRecurse); + TD, TLI, DT, MaxRecurse); case Instruction::LShr: - return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); + return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT, + MaxRecurse); case Instruction::AShr: - return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse); - case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse); - case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse); - case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse); + return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT, + MaxRecurse); + case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, TLI, DT, + MaxRecurse); + case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, TLI, DT, + MaxRecurse); default: if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Constant *COps[] = {CLHS, CRHS}; - return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD); + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD, TLI); } // If the operation is associative, try some generic simplifications. if (Instruction::isAssociative(Opcode)) - if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT, + if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT, + if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, TLI, DT, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, TLI, DT, + MaxRecurse)) return V; return 0; @@ -2455,23 +2537,27 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyBinOp(Opcode, LHS, RHS, TD, TLI, DT, RecursionLimit); } /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) - return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); - return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse); + return SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, const DominatorTree *DT) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); + const TargetData *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit); } static Value *SimplifyCallInst(CallInst *CI) { @@ -2485,78 +2571,79 @@ static Value *SimplifyCallInst(CallInst *CI) { /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { Value *Result; switch (I->getOpcode()) { default: - Result = ConstantFoldInstruction(I, TD); + Result = ConstantFoldInstruction(I, TD, TLI); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::Mul: - Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FDiv: - Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FRem: - Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->hasNoSignedWrap(), cast<BinaryOperator>(I)->hasNoUnsignedWrap(), - TD, DT); + TD, TLI, DT); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->isExact(), - TD, DT); + TD, TLI, DT); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast<BinaryOperator>(I)->isExact(), - TD, DT); + TD, TLI, DT); break; case Instruction::And: - Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Or: - Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Xor: - Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, DT); + I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), TD, DT); + I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), @@ -2596,6 +2683,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, /// void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, const TargetData *TD, + const TargetLibraryInfo *TLI, const DominatorTree *DT) { assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); @@ -2620,12 +2708,12 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, // SimplifyInstruction. AssertingVH<> UserHandle(User); - SimplifiedVal = SimplifyInstruction(User, TD, DT); + SimplifiedVal = SimplifyInstruction(User, TD, TLI, DT); if (SimplifiedVal == 0) continue; } // Recursively simplify this user to the new value. - ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT); + ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, TLI, DT); From = dyn_cast_or_null<Instruction>((Value*)FromHandle); To = ToHandle; diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt index 92f199b..a8a8079 100644 --- a/lib/Analysis/LLVMBuild.txt +++ b/lib/Analysis/LLVMBuild.txt @@ -15,9 +15,11 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = IPA + [component_0] type = Library name = Analysis parent = Libraries required_libraries = Core Support Target - diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index f80595c..d27d911 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" @@ -33,7 +34,10 @@ using namespace llvm; char LazyValueInfo::ID = 0; -INITIALIZE_PASS(LazyValueInfo, "lazy-value-info", +INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", + "Lazy Value Information Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) namespace llvm { @@ -61,10 +65,10 @@ class LVILatticeVal { constant, /// notconstant - This Value is known to not have the specified value. notconstant, - + /// constantrange - The Value falls within this range. constantrange, - + /// overdefined - This value is not known to be constant, and we know that /// it has a value. overdefined @@ -207,7 +211,7 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData for smarter constant folding. + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast<ConstantInt>( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getConstant(), @@ -233,7 +237,7 @@ public: // Unless we can prove that the two Constants are different, we must // move to overdefined. - // FIXME: use TargetData for smarter constant folding. + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (ConstantInt *Res = dyn_cast<ConstantInt>( ConstantFoldCompareInstOperands(CmpInst::ICMP_NE, getNotConstant(), @@ -367,7 +371,11 @@ namespace { /// for cache updating. typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; DenseSet<OverDefinedPairTy> OverDefinedCache; - + + /// SeenBlocks - Keep track of all blocks that we have ever seen, so we + /// don't spend time removing unused blocks from our caches. + DenseSet<AssertingVH<BasicBlock> > SeenBlocks; + /// BlockValueStack - This stack holds the state of the value solver /// during a query. It basically emulates the callstack of the naive /// recursive value lookup process. @@ -438,6 +446,7 @@ namespace { /// clear - Empty the cache. void clear() { + SeenBlocks.clear(); ValueCache.clear(); OverDefinedCache.clear(); } @@ -466,6 +475,12 @@ void LVIValueHandle::deleted() { } void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { + // Shortcut if we have never seen this block. + DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB); + if (I == SeenBlocks.end()) + return; + SeenBlocks.erase(I); + SmallVector<OverDefinedPairTy, 4> ToErase; for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) { @@ -505,6 +520,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { if (Constant *VC = dyn_cast<Constant>(Val)) return LVILatticeVal::get(VC); + SeenBlocks.insert(BB); return lookup(Val)[BB]; } @@ -513,6 +529,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { return true; ValueCacheEntryTy &Cache = lookup(Val); + SeenBlocks.insert(BB); LVILatticeVal &BBLV = Cache[BB]; // OverDefinedCacheUpdater is a helper object that will update @@ -1007,12 +1024,19 @@ static LazyValueInfoCache &getCache(void *&PImpl) { bool LazyValueInfo::runOnFunction(Function &F) { if (PImpl) getCache(PImpl).clear(); - + TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + // Fully lazy. return false; } +void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<TargetLibraryInfo>(); +} + void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { @@ -1061,7 +1085,8 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, // If we know the value is a constant, evaluate the conditional. Constant *Res = 0; if (Result.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD); + Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD, + TLI); if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res)) return ResCI->isZero() ? False : True; return Unknown; @@ -1102,13 +1127,15 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD); + Result.getNotConstant(), C, TD, + TLI); if (Res->isNullValue()) return False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, TD); + Result.getNotConstant(), C, TD, + TLI); if (Res->isNullValue()) return True; } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 38d677d..971065f 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -44,6 +44,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/IntrinsicInst.h" @@ -103,6 +104,7 @@ namespace { AliasAnalysis *AA; DominatorTree *DT; TargetData *TD; + TargetLibraryInfo *TLI; std::string Messages; raw_string_ostream MessagesStr; @@ -117,6 +119,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); AU.addRequired<DominatorTree>(); } virtual void print(raw_ostream &O, const Module *M) const {} @@ -149,6 +152,7 @@ namespace { char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", @@ -174,6 +178,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); visit(F); dbgs() << MessagesStr.str(); Messages.clear(); @@ -614,10 +619,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast<Instruction>(V)) { - if (Value *W = SimplifyInstruction(Inst, TD, DT)) + if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT)) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { - if (Value *W = ConstantFoldConstantExpression(CE, TD)) + if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI)) if (W != V) return findValueImpl(W, OffsetOk, Visited); } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index c7833bf..858cc64 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" @@ -95,7 +96,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, // Test if the value is already loop-invariant. if (isLoopInvariant(I)) return true; - if (!I->isSafeToSpeculativelyExecute()) + if (!isSafeToSpeculativelyExecute(I)) return false; if (I->mayReadFromMemory()) return false; @@ -165,99 +166,6 @@ PHINode *Loop::getCanonicalInductionVariable() const { return 0; } -/// getTripCount - Return a loop-invariant LLVM value indicating the number of -/// times the loop will be executed. Note that this means that the backedge -/// of the loop executes N-1 times. If the trip-count cannot be determined, -/// this returns null. -/// -/// The IndVarSimplify pass transforms loops to have a form that this -/// function easily understands. -/// -Value *Loop::getTripCount() const { - // Canonical loops will end with a 'cmp ne I, V', where I is the incremented - // canonical induction variable and V is the trip count of the loop. - PHINode *IV = getCanonicalInductionVariable(); - if (IV == 0 || IV->getNumIncomingValues() != 2) return 0; - - bool P0InLoop = contains(IV->getIncomingBlock(0)); - Value *Inc = IV->getIncomingValue(!P0InLoop); - BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop); - - if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator())) - if (BI->isConditional()) { - if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { - if (ICI->getOperand(0) == Inc) { - if (BI->getSuccessor(0) == getHeader()) { - if (ICI->getPredicate() == ICmpInst::ICMP_NE) - return ICI->getOperand(1); - } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { - return ICI->getOperand(1); - } - } - } - } - - return 0; -} - -/// getSmallConstantTripCount - Returns the trip count of this loop as a -/// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// or not constant. Will also return 0 if the trip count is very large -/// (>= 2^32) -unsigned Loop::getSmallConstantTripCount() const { - Value* TripCount = this->getTripCount(); - if (TripCount) { - if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) { - // Guard against huge trip counts. - if (TripCountC->getValue().getActiveBits() <= 32) { - return (unsigned)TripCountC->getZExtValue(); - } - } - } - return 0; -} - -/// getSmallConstantTripMultiple - Returns the largest constant divisor of the -/// trip count of this loop as a normal unsigned value, if possible. This -/// means that the actual trip count is always a multiple of the returned -/// value (don't forget the trip count could very well be zero as well!). -/// -/// Returns 1 if the trip count is unknown or not guaranteed to be the -/// multiple of a constant (which is also the case if the trip count is simply -/// constant, use getSmallConstantTripCount for that case), Will also return 1 -/// if the trip count is very large (>= 2^32). -unsigned Loop::getSmallConstantTripMultiple() const { - Value* TripCount = this->getTripCount(); - // This will hold the ConstantInt result, if any - ConstantInt *Result = NULL; - if (TripCount) { - // See if the trip count is constant itself - Result = dyn_cast<ConstantInt>(TripCount); - // if not, see if it is a multiplication - if (!Result) - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) { - switch (BO->getOpcode()) { - case BinaryOperator::Mul: - Result = dyn_cast<ConstantInt>(BO->getOperand(1)); - break; - case BinaryOperator::Shl: - if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) - if (CI->getValue().getActiveBits() <= 5) - return 1u << CI->getZExtValue(); - break; - default: - break; - } - } - } - // Guard against huge trip counts. - if (Result && Result->getValue().getActiveBits() <= 32) { - return (unsigned)Result->getZExtValue(); - } else { - return 1; - } -} - /// isLCSSAForm - Return true if the Loop is in LCSSA form bool Loop::isLCSSAForm(DominatorTree &DT) const { // Sort the blocks vector so that we can use binary search to do quick diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index fde07ea..22414b3 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -130,7 +130,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { AliasAnalysis::Location Loc = AA.getLocation(LI); MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI); } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - if (!LI->isUnordered()) { + if (!SI->isUnordered()) { // FIXME: Handle atomic/volatile stores. Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), static_cast<BasicBlock *>(0))); diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 7e22ddc..80ea219 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" @@ -27,7 +28,7 @@ static bool CanPHITrans(Instruction *Inst) { return true; if (isa<CastInst>(Inst) && - Inst->isSafeToSpeculativelyExecute()) + isSafeToSpeculativelyExecute(Inst)) return true; if (Inst->getOpcode() == Instruction::Add && @@ -186,7 +187,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // operands need to be phi translated, and if so, reconstruct it. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!Cast->isSafeToSpeculativelyExecute()) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return 0; Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); if (PHIIn == 0) return 0; if (PHIIn == Cast->getOperand(0)) @@ -284,7 +285,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -381,7 +382,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // Handle cast of PHI translatable value. if (CastInst *Cast = dyn_cast<CastInst>(Inst)) { - if (!Cast->isSafeToSpeculativelyExecute()) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return 0; Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, NewInsts); if (OpVal == 0) return 0; diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index 379d79c..0cb1588 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -30,7 +30,7 @@ static cl::opt<bool,false> ProfileVerifierDisableAssertions("profile-verifier-noassert", cl::desc("Disable assertions")); -namespace llvm { +namespace { template<class FType, class BType> class ProfileVerifierPassT : public FunctionPass { diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 622b214..daf7742 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -74,6 +74,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" @@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) char ScalarEvolution::ID = 0; @@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const { OS << OpStr; } OS << ")"; + switch (NAry->getSCEVType()) { + case scAddExpr: + case scMulExpr: + if (NAry->getNoWrapFlags(FlagNUW)) + OS << "<nuw>"; + if (NAry->getNoWrapFlags(FlagNSW)) + OS << "<nsw>"; + } return; } case scUDivExpr: { @@ -2581,7 +2591,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2590,7 +2600,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2607,7 +2617,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2617,7 +2627,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -3108,7 +3118,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, TD, DT)) + if (Value *V = SimplifyInstruction(PN, TD, TLI, DT)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3584,6 +3594,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // because it leads to N-1 getAddExpr calls for N ultimate operands. // Instead, gather up all the operands and make a single getAddExpr call. // LLVM IR canonical form means we need only traverse the left operands. + // + // Don't apply this instruction's NSW or NUW flags to the new + // expression. The instruction may be guarded by control flow that the + // no-wrap behavior depends on. Non-control-equivalent instructions can be + // mapped to the same SCEV expression, and it would be incorrect to transfer + // NSW/NUW semantics to those operations. SmallVector<const SCEV *, 4> AddOps; AddOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { @@ -3598,16 +3614,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { AddOps.push_back(Op1); } AddOps.push_back(getSCEV(U->getOperand(0))); - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V); - if (OBO->hasNoSignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNSW); - if (OBO->hasNoUnsignedWrap()) - Flags = setFlags(Flags, SCEV::FlagNUW); - return getAddExpr(AddOps, Flags); + return getAddExpr(AddOps); } case Instruction::Mul: { - // See the Add code above. + // Don't transfer NSW/NUW for the same reason as AddExpr. SmallVector<const SCEV *, 4> MulOps; MulOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); @@ -4762,7 +4772,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap<Instruction *, Constant *> &Vals, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast<Constant>(V)) return C; Instruction *I = dyn_cast<Instruction>(V); @@ -4788,7 +4799,7 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, if (!Operands[i]) return 0; continue; } - Constant *C = EvaluateExpression(Operand, L, Vals, TD); + Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI); Vals[Operand] = C; if (!C) return 0; Operands[i] = C; @@ -4796,12 +4807,13 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, if (CmpInst *CI = dyn_cast<CmpInst>(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], TD); + Operands[1], TD, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isVolatile()) return ConstantFoldLoadFromConstPtr(Operands[0], TD); } - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD, + TLI); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -4856,7 +4868,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap<Instruction *, Constant *> NextIterVals; - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, + TLI); if (NextPHI == 0) return 0; // Couldn't evaluate! NextIterVals[PN] = NextPHI; @@ -4881,7 +4894,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); } if (NextPHI != I->second) StoppedEvolving = false; @@ -4936,8 +4949,8 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = - dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, - CurrentIterVals, TD)); + dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals, + TD, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4967,7 +4980,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, if (NextPHI) continue; // Already computed! Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); } CurrentIterVals.swap(NextIterVals); } @@ -5159,13 +5172,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { Constant *C = 0; if (const CmpInst *CI = dyn_cast<CmpInst>(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD); + Operands[0], Operands[1], TD, + TLI); else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { if (!LI->isVolatile()) C = ConstantFoldLoadFromConstPtr(Operands[0], TD); } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, TD); + Operands, TD, TLI); if (!C) return V; return getSCEV(C); } @@ -6552,6 +6566,7 @@ bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis<LoopInfo>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTree>(); return false; } @@ -6588,6 +6603,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<LoopInfo>(); AU.addRequiredTransitive<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); } bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 47f0f32..f3cf549 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -73,9 +73,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { "InsertNoopCastOfTo cannot change sizes!"); // Short-circuit unnecessary bitcasts. - if (Op == Instruction::BitCast && V->getType() == Ty) - return V; - + if (Op == Instruction::BitCast) { + if (V->getType() == Ty) + return V; + if (CastInst *CI = dyn_cast<CastInst>(V)) { + if (CI->getOperand(0)->getType() == Ty) + return CI->getOperand(0); + } + } // Short-circuit unnecessary inttoptr<->ptrtoint casts. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { @@ -929,6 +934,36 @@ bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, } } +/// expandIVInc - Expand an IV increment at Builder's current InsertPos. +/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may +/// need to materialize IV increments elsewhere to handle difficult situations. +Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, + Type *ExpandTy, Type *IntTy, + bool useSubtract) { + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (ExpandTy->isPointerTy()) { + PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa<ConstantInt>(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN->getType()) { + IncV = Builder.CreateBitCast(IncV, PN->getType()); + rememberInstruction(IncV); + } + } else { + IncV = useSubtract ? + Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : + Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); + rememberInstruction(IncV); + } + return IncV; +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. @@ -993,16 +1028,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), L->getHeader())); - // Expand code for the step value. Insert instructions right before the - // terminator corresponding to the back-edge. Do this before creating the PHI - // so that PHI reuse code doesn't see an incomplete PHI. If the stride is - // negative, insert a sub instead of an add for the increment (unless it's a - // constant, because subtracts of constants are canonicalized to adds). + // Expand code for the step value. Do this before creating the PHI so that PHI + // reuse code doesn't see an incomplete PHI. const SCEV *Step = Normalized->getStepRecurrence(SE); - bool isPointer = ExpandTy->isPointerTy(); - bool isNegative = !isPointer && isNonConstantNegative(Step); - if (isNegative) + // If the stride is negative, insert a sub instead of an add for the increment + // (unless it's a constant, because subtracts of constants are canonicalized + // to adds). + bool useSubtract = !ExpandTy->isPointerTy() && isNonConstantNegative(Step); + if (useSubtract) Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); // Create the PHI. @@ -1023,33 +1058,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, continue; } - // Create a step value and add it to the PHI. If IVIncInsertLoop is - // non-null and equal to the addrec's loop, insert the instructions - // at IVIncInsertPos. + // Create a step value and add it to the PHI. + // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the + // instructions at IVIncInsertPos. Instruction *InsertPos = L == IVIncInsertLoop ? IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); - Value *IncV; - // If the PHI is a pointer, use a GEP, otherwise use an add or sub. - if (isPointer) { - PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); - // If the step isn't constant, don't use an implicitly scaled GEP, because - // that would require a multiply inside the loop. - if (!isa<ConstantInt>(StepV)) - GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), - GEPPtrTy->getAddressSpace()); - const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; - IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); - if (IncV->getType() != PN->getType()) { - IncV = Builder.CreateBitCast(IncV, PN->getType()); - rememberInstruction(IncV); - } - } else { - IncV = isNegative ? - Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : - Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); - rememberInstruction(IncV); - } + Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + PN->addIncoming(IncV, Pred); } @@ -1124,10 +1140,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. - assert((!isa<Instruction>(Result) || - SE.DT->dominates(cast<Instruction>(Result), - Builder.GetInsertPoint())) && - "postinc expansion does not dominate use"); + if (isa<Instruction>(Result) + && !SE.DT->dominates(cast<Instruction>(Result), + Builder.GetInsertPoint())) { + // The induction variable's postinc expansion does not dominate this use. + // IVUsers tries to prevent this case, so it is rare. However, it can + // happen when an IVUser outside the loop is not dominated by the latch + // block. Adjusting IVIncInsertPos before expansion begins cannot handle + // all cases. Consider a phi outide whose operand is replaced during + // expansion with the value of the postinc user. Without fundamentally + // changing the way postinc users are tracked, the only remedy is + // inserting an extra IV increment. StepV might fold into PostLoopOffset, + // but hopefully expandCodeFor handles that. + bool useSubtract = + !ExpandTy->isPointerTy() && isNonConstantNegative(Step); + if (useSubtract) + Step = SE.getNegativeSCEV(Step); + // Expand the step somewhere that dominates the loop header. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + // Restore the insertion point to the place where the caller has + // determined dominates all uses. + restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + } } // Re-apply any non-loop-dominating scale. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 22f1c14..ef19e06 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -63,13 +63,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Mask.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy()) - && "Not integer or pointer type!"); + assert((V->getType()->isIntOrIntVectorTy() || + V->getType()->getScalarType()->isPointerTy()) && + "Not integer or pointer type!"); assert((!TD || TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && + KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && "V, Mask, KnownOne and KnownZero should have same BitWidth"); @@ -103,14 +104,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { - Type *ObjectType = GV->getType()->getElementType(); - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (!GV->isDeclaration() && !GV->mayBeOverridden()) - Align = TD->getPrefTypeAlignment(ObjectType); - else - Align = TD->getABITypeAlignment(ObjectType); + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + Type *ObjectType = GVar->getType()->getElementType(); + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) + Align = TD->getPreferredAlignment(GVar); + else + Align = TD->getABITypeAlignment(ObjectType); + } } if (Align > 0) KnownZero = Mask & APInt::getLowBitsSet(BitWidth, @@ -1367,6 +1370,8 @@ Value *llvm::isBytewiseValue(Value *V) { return Val; } + + // FIXME: Vector types (e.g., <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>). // Conceptually, we could handle things like: // %a = zext i8 %X to i16 @@ -1555,7 +1560,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range, Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const TargetData &TD) { Operator *PtrOp = dyn_cast<Operator>(Ptr); - if (PtrOp == 0) return Ptr; + if (PtrOp == 0 || Ptr->getType()->isVectorTy()) + return Ptr; // Just look through bitcasts. if (PtrOp->getOpcode() == Instruction::BitCast) @@ -1869,3 +1875,64 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { } return true; } + +bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, + const TargetData *TD) { + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i))) + if (C->canTrap()) + return false; + + switch (Inst->getOpcode()) { + default: + return true; + case Instruction::UDiv: + case Instruction::URem: + // x / y is undefined if y == 0, but calcuations like x / 3 are safe. + return isKnownNonZero(Inst->getOperand(1), TD); + case Instruction::SDiv: + case Instruction::SRem: { + Value *Op = Inst->getOperand(1); + // x / y is undefined if y == 0 + if (!isKnownNonZero(Op, TD)) + return false; + // x / y might be undefined if y == -1 + unsigned BitWidth = getBitWidth(Op->getType(), TD); + if (BitWidth == 0) + return false; + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + ComputeMaskedBits(Op, APInt::getAllOnesValue(BitWidth), + KnownZero, KnownOne, TD); + return !!KnownZero; + } + case Instruction::Load: { + const LoadInst *LI = cast<LoadInst>(Inst); + if (!LI->isUnordered()) + return false; + return LI->getPointerOperand()->isDereferenceablePointer(); + } + case Instruction::Call: + return false; // The called function could have undefined behavior or + // side-effects. + // FIXME: We should special-case some intrinsics (bswap, + // overflow-checking arithmetic, etc.) + case Instruction::VAArg: + case Instruction::Alloca: + case Instruction::Invoke: + case Instruction::PHI: + case Instruction::Store: + case Instruction::Ret: + case Instruction::Br: + case Instruction::IndirectBr: + case Instruction::Switch: + case Instruction::Unwind: + case Instruction::Unreachable: + case Instruction::Fence: + case Instruction::LandingPad: + case Instruction::AtomicRMW: + case Instruction::AtomicCmpXchg: + case Instruction::Resume: + return false; // Misc instructions which have effects + } +} diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 8fcc7aa..9ef2943 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -182,11 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where, if (hasSlash || filePath.str().length() > 15) flags |= ArchiveMember::HasLongFilenameFlag; - sys::LLVMFileType type; + sys::fs::file_magic type; if (sys::fs::identify_magic(mbr->path.str(), type)) - type = sys::Unknown_FileType; + type = sys::fs::file_magic::unknown; switch (type) { - case sys::Bitcode_FileType: + case sys::fs::file_magic::bitcode: flags |= ArchiveMember::BitcodeFlag; break; default: diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt index b52974e..7ff478a 100644 --- a/lib/Archive/CMakeLists.txt +++ b/lib/Archive/CMakeLists.txt @@ -3,9 +3,3 @@ add_llvm_library(LLVMArchive ArchiveReader.cpp ArchiveWriter.cpp ) - -add_llvm_library_dependencies(LLVMArchive - LLVMBitReader - LLVMCore - LLVMSupport - ) diff --git a/lib/Archive/LLVMBuild.txt b/lib/Archive/LLVMBuild.txt index 26b7c8e..d68550b 100644 --- a/lib/Archive/LLVMBuild.txt +++ b/lib/Archive/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = Archive parent = Libraries required_libraries = BitReader Core Support - diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt index 7496015..985ebe2 100644 --- a/lib/AsmParser/CMakeLists.txt +++ b/lib/AsmParser/CMakeLists.txt @@ -4,8 +4,3 @@ add_llvm_library(LLVMAsmParser LLParser.cpp Parser.cpp ) - -add_llvm_library_dependencies(LLVMAsmParser - LLVMCore - LLVMSupport - ) diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index cafaab0..4678269 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -120,11 +120,6 @@ bool LLParser::ValidateEndOfModule() { for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove - // Upgrade to new EH scheme. N.B. This will go away in 3.1. - UpgradeExceptionHandling(M); - - // Check debug info intrinsics. - CheckDebugInfoIntrinsics(M); return false; } @@ -1069,7 +1064,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst, return TokError("expected metadata after comma"); std::string Name = Lex.getStrVal(); - unsigned MDK = M->getMDKindID(Name.c_str()); + unsigned MDK = M->getMDKindID(Name); Lex.Lex(); MDNode *Node; @@ -1612,7 +1607,8 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) { if ((unsigned)Size != Size) return Error(SizeLoc, "size too large for vector"); if (!VectorType::isValidElementType(EltTy)) - return Error(TypeLoc, "vector element type must be fp or integer"); + return Error(TypeLoc, + "vector element type must be fp, integer or a pointer to these types"); Result = VectorType::get(EltTy, unsigned(Size)); } else { if (!ArrayType::isValidElementType(EltTy)) @@ -1971,9 +1967,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return Error(ID.Loc, "constant vector must not be empty"); if (!Elts[0]->getType()->isIntegerTy() && - !Elts[0]->getType()->isFloatingPointTy()) + !Elts[0]->getType()->isFloatingPointTy() && + !Elts[0]->getType()->isPointerTy()) return Error(FirstEltLoc, - "vector elements must have integer or floating point type"); + "vector elements must have integer, pointer or floating point type"); // Verify that all the vector elements have the same type. for (unsigned i = 1, e = Elts.size(); i != e; ++i) @@ -2165,7 +2162,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } else { assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); if (!Val0->getType()->isIntOrIntVectorTy() && - !Val0->getType()->isPointerTy()) + !Val0->getType()->getScalarType()->isPointerTy()) return Error(ID.Loc, "icmp requires pointer or integer operands"); ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); } @@ -2299,7 +2296,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return true; if (Opc == Instruction::GetElementPtr) { - if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy()) + if (Elts.size() == 0 || + !Elts[0]->getType()->getScalarType()->isPointerTy()) return Error(ID.Loc, "getelementptr requires pointer operand"); ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); @@ -2953,19 +2951,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_tail: return ParseCall(Inst, PFS, true); // Memory. case lltok::kw_alloca: return ParseAlloc(Inst, PFS); - case lltok::kw_load: return ParseLoad(Inst, PFS, false); - case lltok::kw_store: return ParseStore(Inst, PFS, false); + case lltok::kw_load: return ParseLoad(Inst, PFS); + case lltok::kw_store: return ParseStore(Inst, PFS); case lltok::kw_cmpxchg: return ParseCmpXchg(Inst, PFS); case lltok::kw_atomicrmw: return ParseAtomicRMW(Inst, PFS); case lltok::kw_fence: return ParseFence(Inst, PFS); - case lltok::kw_volatile: - // For compatibility; canonical location is after load - if (EatIfPresent(lltok::kw_load)) - return ParseLoad(Inst, PFS, true); - else if (EatIfPresent(lltok::kw_store)) - return ParseStore(Inst, PFS, true); - else - return TokError("expected 'load' or 'store'"); case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS); case lltok::kw_extractvalue: return ParseExtractValue(Inst, PFS); case lltok::kw_insertvalue: return ParseInsertValue(Inst, PFS); @@ -3342,7 +3332,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, } else { assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); if (!LHS->getType()->isIntOrIntVectorTy() && - !LHS->getType()->isPointerTy()) + !LHS->getType()->getScalarType()->isPointerTy()) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); } @@ -3689,10 +3679,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)? /// ::= 'load' 'atomic' 'volatile'? TypeAndValue /// 'singlethread'? AtomicOrdering (',' 'align' i32)? -/// Compatibility: -/// ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)? -int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, - bool isVolatile) { +int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy Loc; unsigned Alignment = 0; bool AteExtraComma = false; @@ -3701,15 +3688,12 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, SynchronizationScope Scope = CrossThread; if (Lex.getKind() == lltok::kw_atomic) { - if (isVolatile) - return TokError("mixing atomic with old volatile placement"); isAtomic = true; Lex.Lex(); } + bool isVolatile = false; if (Lex.getKind() == lltok::kw_volatile) { - if (isVolatile) - return TokError("duplicate volatile before and after store"); isVolatile = true; Lex.Lex(); } @@ -3736,10 +3720,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, /// ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)? /// ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue /// 'singlethread'? AtomicOrdering (',' 'align' i32)? -/// Compatibility: -/// ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)? -int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, - bool isVolatile) { +int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Value *Val, *Ptr; LocTy Loc, PtrLoc; unsigned Alignment = 0; bool AteExtraComma = false; @@ -3748,15 +3729,12 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, SynchronizationScope Scope = CrossThread; if (Lex.getKind() == lltok::kw_atomic) { - if (isVolatile) - return TokError("mixing atomic with old volatile placement"); isAtomic = true; Lex.Lex(); } + bool isVolatile = false; if (Lex.getKind() == lltok::kw_volatile) { - if (isVolatile) - return TokError("duplicate volatile before and after store"); isVolatile = true; Lex.Lex(); } @@ -3902,13 +3880,15 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { /// ParseGetElementPtr /// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)* int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { - Value *Ptr, *Val; LocTy Loc, EltLoc; + Value *Ptr = 0; + Value *Val = 0; + LocTy Loc, EltLoc; bool InBounds = EatIfPresent(lltok::kw_inbounds); if (ParseTypeAndValue(Ptr, Loc, PFS)) return true; - if (!Ptr->getType()->isPointerTy()) + if (!Ptr->getType()->getScalarType()->isPointerTy()) return Error(Loc, "base of getelementptr must be a pointer"); SmallVector<Value*, 16> Indices; @@ -3919,11 +3899,23 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { break; } if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; - if (!Val->getType()->isIntegerTy()) + if (!Val->getType()->getScalarType()->isIntegerTy()) return Error(EltLoc, "getelementptr index must be an integer"); + if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy()) + return Error(EltLoc, "getelementptr index type missmatch"); + if (Val->getType()->isVectorTy()) { + unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements(); + unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements(); + if (ValNumEl != PtrNumEl) + return Error(EltLoc, + "getelementptr vector index has a wrong number of elements"); + } Indices.push_back(Val); } + if (Val && Val->getType()->isVectorTy() && Indices.size() != 1) + return Error(EltLoc, "vector getelementptrs must have a single index"); + if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices)) return Error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ptr, Indices); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index cbc3c23..c2537d7 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -363,8 +363,8 @@ namespace llvm { bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS); bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); int ParseAlloc(Instruction *&I, PerFunctionState &PFS); - int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); - int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); + int ParseLoad(Instruction *&I, PerFunctionState &PFS); + int ParseStore(Instruction *&I, PerFunctionState &PFS); int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS); int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS); int ParseFence(Instruction *&I, PerFunctionState &PFS); diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt index ad56d4c..3bc31ed 100644 --- a/lib/AsmParser/LLVMBuild.txt +++ b/lib/AsmParser/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = AsmParser parent = Libraries required_libraries = Core Support - diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt index 696440d..af9936b 100644 --- a/lib/Bitcode/LLVMBuild.txt +++ b/lib/Bitcode/LLVMBuild.txt @@ -15,8 +15,10 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = Reader Writer + [component_0] type = Group name = Bitcode parent = Libraries - diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 6ecdbae..d584015 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -403,14 +403,6 @@ Type *BitcodeReader::getTypeByID(unsigned ID) { return TypeList[ID] = StructType::create(Context); } -/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable. -Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) { - if (ID >= TypeList.size()) - TypeList.resize(ID+1); - - return TypeList[ID]; -} - //===----------------------------------------------------------------------===// // Functions for parsing blocks from the bitcode file @@ -747,264 +739,6 @@ bool BitcodeReader::ParseTypeTableBody() { } } -// FIXME: Remove in LLVM 3.1 -bool BitcodeReader::ParseOldTypeTable() { - if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD)) - return Error("Malformed block record"); - - if (!TypeList.empty()) - return Error("Multiple TYPE_BLOCKs found!"); - - - // While horrible, we have no good ordering of types in the bc file. Just - // iteratively parse types out of the bc file in multiple passes until we get - // them all. Do this by saving a cursor for the start of the type block. - BitstreamCursor StartOfTypeBlockCursor(Stream); - - unsigned NumTypesRead = 0; - - SmallVector<uint64_t, 64> Record; -RestartScan: - unsigned NextTypeID = 0; - bool ReadAnyTypes = false; - - // Read all the records for this type table. - while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (NextTypeID != TypeList.size()) - return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD"); - - // If we haven't read all of the types yet, iterate again. - if (NumTypesRead != TypeList.size()) { - // If we didn't successfully read any types in this pass, then we must - // have an unhandled forward reference. - if (!ReadAnyTypes) - return Error("Obsolete bitcode contains unhandled recursive type"); - - Stream = StartOfTypeBlockCursor; - goto RestartScan; - } - - if (Stream.ReadBlockEnd()) - return Error("Error at end of type table block"); - return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } - - // Read a record. - Record.clear(); - Type *ResultTy = 0; - switch (Stream.ReadRecord(Code, Record)) { - default: return Error("unknown type in type table"); - case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] - // TYPE_CODE_NUMENTRY contains a count of the number of types in the - // type list. This allows us to reserve space. - if (Record.size() < 1) - return Error("Invalid TYPE_CODE_NUMENTRY record"); - TypeList.resize(Record[0]); - continue; - case bitc::TYPE_CODE_VOID: // VOID - ResultTy = Type::getVoidTy(Context); - break; - case bitc::TYPE_CODE_FLOAT: // FLOAT - ResultTy = Type::getFloatTy(Context); - break; - case bitc::TYPE_CODE_DOUBLE: // DOUBLE - ResultTy = Type::getDoubleTy(Context); - break; - case bitc::TYPE_CODE_X86_FP80: // X86_FP80 - ResultTy = Type::getX86_FP80Ty(Context); - break; - case bitc::TYPE_CODE_FP128: // FP128 - ResultTy = Type::getFP128Ty(Context); - break; - case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128 - ResultTy = Type::getPPC_FP128Ty(Context); - break; - case bitc::TYPE_CODE_LABEL: // LABEL - ResultTy = Type::getLabelTy(Context); - break; - case bitc::TYPE_CODE_METADATA: // METADATA - ResultTy = Type::getMetadataTy(Context); - break; - case bitc::TYPE_CODE_X86_MMX: // X86_MMX - ResultTy = Type::getX86_MMXTy(Context); - break; - case bitc::TYPE_CODE_INTEGER: // INTEGER: [width] - if (Record.size() < 1) - return Error("Invalid Integer type record"); - ResultTy = IntegerType::get(Context, Record[0]); - break; - case bitc::TYPE_CODE_OPAQUE: // OPAQUE - if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0) - ResultTy = StructType::create(Context); - break; - case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD - if (NextTypeID >= TypeList.size()) break; - // If we already read it, don't reprocess. - if (TypeList[NextTypeID] && - !cast<StructType>(TypeList[NextTypeID])->isOpaque()) - break; - - // Set a type. - if (TypeList[NextTypeID] == 0) - TypeList[NextTypeID] = StructType::create(Context); - - std::vector<Type*> EltTys; - for (unsigned i = 1, e = Record.size(); i != e; ++i) { - if (Type *Elt = getTypeByIDOrNull(Record[i])) - EltTys.push_back(Elt); - else - break; - } - - if (EltTys.size() != Record.size()-1) - break; // Not all elements are ready. - - cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]); - ResultTy = TypeList[NextTypeID]; - TypeList[NextTypeID] = 0; - break; - } - case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or - // [pointee type, address space] - if (Record.size() < 1) - return Error("Invalid POINTER type record"); - unsigned AddressSpace = 0; - if (Record.size() == 2) - AddressSpace = Record[1]; - if ((ResultTy = getTypeByIDOrNull(Record[0]))) - ResultTy = PointerType::get(ResultTy, AddressSpace); - break; - } - case bitc::TYPE_CODE_FUNCTION_OLD: { - // FIXME: attrid is dead, remove it in LLVM 3.0 - // FUNCTION: [vararg, attrid, retty, paramty x N] - if (Record.size() < 3) - return Error("Invalid FUNCTION type record"); - std::vector<Type*> ArgTys; - for (unsigned i = 3, e = Record.size(); i != e; ++i) { - if (Type *Elt = getTypeByIDOrNull(Record[i])) - ArgTys.push_back(Elt); - else - break; - } - if (ArgTys.size()+3 != Record.size()) - break; // Something was null. - if ((ResultTy = getTypeByIDOrNull(Record[2]))) - ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); - break; - } - case bitc::TYPE_CODE_FUNCTION: { - // FUNCTION: [vararg, retty, paramty x N] - if (Record.size() < 2) - return Error("Invalid FUNCTION type record"); - std::vector<Type*> ArgTys; - for (unsigned i = 2, e = Record.size(); i != e; ++i) { - if (Type *Elt = getTypeByIDOrNull(Record[i])) - ArgTys.push_back(Elt); - else - break; - } - if (ArgTys.size()+2 != Record.size()) - break; // Something was null. - if ((ResultTy = getTypeByIDOrNull(Record[1]))) - ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); - break; - } - case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] - if (Record.size() < 2) - return Error("Invalid ARRAY type record"); - if ((ResultTy = getTypeByIDOrNull(Record[1]))) - ResultTy = ArrayType::get(ResultTy, Record[0]); - break; - case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] - if (Record.size() < 2) - return Error("Invalid VECTOR type record"); - if ((ResultTy = getTypeByIDOrNull(Record[1]))) - ResultTy = VectorType::get(ResultTy, Record[0]); - break; - } - - if (NextTypeID >= TypeList.size()) - return Error("invalid TYPE table"); - - if (ResultTy && TypeList[NextTypeID] == 0) { - ++NumTypesRead; - ReadAnyTypes = true; - - TypeList[NextTypeID] = ResultTy; - } - - ++NextTypeID; - } -} - - -bool BitcodeReader::ParseOldTypeSymbolTable() { - if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD)) - return Error("Malformed block record"); - - SmallVector<uint64_t, 64> Record; - - // Read all the records for this type table. - std::string TypeName; - while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of type symbol table block"); - return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } - - // Read a record. - Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { - default: // Default behavior: unknown type. - break; - case bitc::TST_CODE_ENTRY: // TST_ENTRY: [typeid, namechar x N] - if (ConvertToString(Record, 1, TypeName)) - return Error("Invalid TST_ENTRY record"); - unsigned TypeID = Record[0]; - if (TypeID >= TypeList.size()) - return Error("Invalid Type ID in TST_ENTRY record"); - - // Only apply the type name to a struct type with no name. - if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID])) - if (!STy->isLiteral() && !STy->hasName()) - STy->setName(TypeName); - TypeName.clear(); - break; - } - } -} - bool BitcodeReader::ParseValueSymbolTable() { if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return Error("Malformed block record"); @@ -1553,6 +1287,50 @@ bool BitcodeReader::ParseConstants() { return false; } +bool BitcodeReader::ParseUseLists() { + if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) + return Error("Malformed block record"); + + SmallVector<uint64_t, 64> Record; + + // Read all the records. + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of use-list table block"); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a use list record. + Record.clear(); + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: unknown type. + break; + case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD. + unsigned RecordLength = Record.size(); + if (RecordLength < 1) + return Error ("Invalid UseList reader!"); + UseListRecords.push_back(Record); + break; + } + } + } +} + /// RememberAndSkipFunctionBody - When we see the block for a function body, /// remember where it is and then skip it. This lets us lazily deserialize the /// functions. @@ -1636,14 +1414,6 @@ bool BitcodeReader::ParseModule() { if (ParseTypeTable()) return true; break; - case bitc::TYPE_BLOCK_ID_OLD: - if (ParseOldTypeTable()) - return true; - break; - case bitc::TYPE_SYMTAB_BLOCK_ID_OLD: - if (ParseOldTypeSymbolTable()) - return true; - break; case bitc::VALUE_SYMTAB_BLOCK_ID: if (ParseValueSymbolTable()) return true; @@ -1667,6 +1437,10 @@ bool BitcodeReader::ParseModule() { if (RememberAndSkipFunctionBody()) return true; break; + case bitc::USELIST_BLOCK_ID: + if (ParseUseLists()) + return true; + break; } continue; } @@ -2975,12 +2749,6 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) { } std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics); - // Upgrade to new EH scheme. N.B. This will go away in 3.1. - UpgradeExceptionHandling(M); - - // Check debug info intrinsics. - CheckDebugInfoIntrinsics(TheModule); - return false; } @@ -3026,6 +2794,9 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, return 0; } + // TODO: Restore the use-lists to the in-memory state when the bitcode was + // written. We must defer until the Module has been fully materialized. + return M; } diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 6e6118c..978b15b 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -135,6 +135,7 @@ class BitcodeReader : public GVMaterializer { BitcodeReaderValueList ValueList; BitcodeReaderMDValueList MDValueList; SmallVector<Instruction *, 64> InstructionList; + SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords; std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits; std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits; @@ -211,7 +212,6 @@ public: bool ParseTriple(std::string &Triple); private: Type *getTypeByID(unsigned ID); - Type *getTypeByIDOrNull(unsigned ID); Value *getFnValueByID(unsigned ID, Type *Ty) { if (Ty && Ty->isMetadataTy()) return MDValueList.getValueFwdRef(ID); @@ -259,10 +259,8 @@ private: bool ParseModule(); bool ParseAttributeBlock(); bool ParseTypeTable(); - bool ParseOldTypeTable(); // FIXME: Remove in LLVM 3.1 bool ParseTypeTableBody(); - bool ParseOldTypeSymbolTable(); // FIXME: Remove in LLVM 3.1 bool ParseValueSymbolTable(); bool ParseConstants(); bool RememberAndSkipFunctionBody(); @@ -271,6 +269,7 @@ private: bool ParseMetadata(); bool ParseMetadataAttachment(); bool ParseModuleTriple(std::string &Triple); + bool ParseUseLists(); }; } // End llvm namespace diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt index 37bebc4..693d431 100644 --- a/lib/Bitcode/Reader/CMakeLists.txt +++ b/lib/Bitcode/Reader/CMakeLists.txt @@ -2,8 +2,3 @@ add_llvm_library(LLVMBitReader BitReader.cpp BitcodeReader.cpp ) - -add_llvm_library_dependencies(LLVMBitReader - LLVMCore - LLVMSupport - ) diff --git a/lib/Bitcode/Reader/LLVMBuild.txt b/lib/Bitcode/Reader/LLVMBuild.txt index 948b335..c85a87b 100644 --- a/lib/Bitcode/Reader/LLVMBuild.txt +++ b/lib/Bitcode/Reader/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = BitReader parent = Bitcode required_libraries = Core Support - diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index e758f94..d980163 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -23,6 +23,7 @@ #include "llvm/Operator.h" #include "llvm/ValueSymbolTable.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -31,6 +32,12 @@ #include <map> using namespace llvm; +static cl::opt<bool> +EnablePreserveUseListOrdering("enable-bc-uselist-preserve", + cl::desc("Turn on experimental support for " + "use-list order preservation."), + cl::init(false), cl::Hidden); + /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { @@ -194,11 +201,12 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); SmallVector<uint64_t, 64> TypeVals; + uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1); + // Abbrev for TYPE_CODE_POINTER. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv); @@ -207,8 +215,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_STRUCT_ANON. @@ -216,8 +224,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_STRUCT_NAME. @@ -232,16 +240,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - Log2_32_Ceil(VE.getTypes().size()+1))); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); + unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv); // Emit an entry count so the reader can reserve space. @@ -497,8 +505,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the function proto information. for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { - // FUNCTION: [type, callingconv, isproto, paramattr, - // linkage, alignment, section, visibility, gc, unnamed_addr] + // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, + // section, visibility, gc, unnamed_addr] Vals.push_back(VE.getTypeID(F->getType())); Vals.push_back(F->getCallingConv()); Vals.push_back(F->isDeclaration()); @@ -518,6 +526,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the alias information. for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end(); AI != E; ++AI) { + // ALIAS: [alias type, aliasee val#, linkage, visibility] Vals.push_back(VE.getTypeID(AI->getType())); Vals.push_back(VE.getValueID(AI->getAliasee())); Vals.push_back(getEncodedLinkage(AI)); @@ -1571,6 +1580,102 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } +// Sort the Users based on the order in which the reader parses the bitcode +// file. +static bool bitcodereader_order(const User *lhs, const User *rhs) { + // TODO: Implement. + return true; +} + +static void WriteUseList(const Value *V, const ValueEnumerator &VE, + BitstreamWriter &Stream) { + + // One or zero uses can't get out of order. + if (V->use_empty() || V->hasNUses(1)) + return; + + // Make a copy of the in-memory use-list for sorting. + unsigned UseListSize = std::distance(V->use_begin(), V->use_end()); + SmallVector<const User*, 8> UseList; + UseList.reserve(UseListSize); + for (Value::const_use_iterator I = V->use_begin(), E = V->use_end(); + I != E; ++I) { + const User *U = *I; + UseList.push_back(U); + } + + // Sort the copy based on the order read by the BitcodeReader. + std::sort(UseList.begin(), UseList.end(), bitcodereader_order); + + // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the + // sorted list (i.e., the expected BitcodeReader in-memory use-list). + + // TODO: Emit the USELIST_CODE_ENTRYs. +} + +static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE, + BitstreamWriter &Stream) { + VE.incorporateFunction(*F); + + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) + WriteUseList(AI, VE, Stream); + for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE; + ++BB) { + WriteUseList(BB, VE, Stream); + for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { + WriteUseList(II, VE, Stream); + for (User::const_op_iterator OI = II->op_begin(), E = II->op_end(); + OI != E; ++OI) { + if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) || + isa<InlineAsm>(*OI)) + WriteUseList(*OI, VE, Stream); + } + } + } + VE.purgeFunction(); +} + +// Emit use-lists. +static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, + BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); + + // XXX: this modifies the module, but in a way that should never change the + // behavior of any pass or codegen in LLVM. The problem is that GVs may + // contain entries in the use_list that do not exist in the Module and are + // not stored in the .bc file. + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) + I->removeDeadConstantUsers(); + + // Write the global variables. + for (Module::const_global_iterator GI = M->global_begin(), + GE = M->global_end(); GI != GE; ++GI) { + WriteUseList(GI, VE, Stream); + + // Write the global variable initializers. + if (GI->hasInitializer()) + WriteUseList(GI->getInitializer(), VE, Stream); + } + + // Write the functions. + for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) { + WriteUseList(FI, VE, Stream); + if (!FI->isDeclaration()) + WriteFunctionUseList(FI, VE, Stream); + } + + // Write the aliases. + for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end(); + AI != AE; ++AI) { + WriteUseList(AI, VE, Stream); + WriteUseList(AI->getAliasee(), VE, Stream); + } + + Stream.ExitBlock(); +} /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream) { @@ -1616,6 +1721,10 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit names for globals/functions etc. WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); + // Emit use-lists. + if (EnablePreserveUseListOrdering) + WriteModuleUseLists(M, VE, Stream); + Stream.ExitBlock(); } diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt index 3cf9056..f097b09 100644 --- a/lib/Bitcode/Writer/CMakeLists.txt +++ b/lib/Bitcode/Writer/CMakeLists.txt @@ -4,8 +4,3 @@ add_llvm_library(LLVMBitWriter BitcodeWriterPass.cpp ValueEnumerator.cpp ) - -add_llvm_library_dependencies(LLVMBitWriter - LLVMCore - LLVMSupport - ) diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt index 39ff04e..7d9e1de 100644 --- a/lib/Bitcode/Writer/LLVMBuild.txt +++ b/lib/Bitcode/Writer/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = BitWriter parent = Bitcode required_libraries = Core Support - diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 9ae9905..1c4d670 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -19,6 +19,8 @@ #include "llvm/Module.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include <algorithm> using namespace llvm; @@ -107,7 +109,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) { OptimizeConstants(FirstConstant, Values.size()); } - unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { InstructionMapType::const_iterator I = InstructionMap.find(Inst); assert(I != InstructionMap.end() && "Instruction is not mapped!"); @@ -130,6 +131,43 @@ unsigned ValueEnumerator::getValueID(const Value *V) const { return I->second-1; } +void ValueEnumerator::dump() const { + print(dbgs(), ValueMap, "Default"); + dbgs() << '\n'; + print(dbgs(), MDValueMap, "MetaData"); + dbgs() << '\n'; +} + +void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, + const char *Name) const { + + OS << "Map Name: " << Name << "\n"; + OS << "Size: " << Map.size() << "\n"; + for (ValueMapType::const_iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + + const Value *V = I->first; + if (V->hasName()) + OS << "Value: " << V->getName(); + else + OS << "Value: [null]\n"; + V->dump(); + + OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):"; + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + if (UI != V->use_begin()) + OS << ","; + if((*UI)->hasName()) + OS << " " << (*UI)->getName(); + else + OS << " [null]"; + + } + OS << "\n\n"; + } +} + // Optimize constant ordering. namespace { struct CstSortPredicate { diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index b6fc920..a6ca536 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -32,6 +32,7 @@ class NamedMDNode; class AttrListPtr; class ValueSymbolTable; class MDSymbolTable; +class raw_ostream; class ValueEnumerator { public: @@ -83,6 +84,9 @@ private: public: ValueEnumerator(const Module *M); + void dump() const; + void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const; + unsigned getValueID(const Value *V) const; unsigned getTypeID(Type *T) const { diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 25842a7..6cf4571 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { assert(State == NULL); State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); - bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); std::vector<unsigned> &KillIndices = State->GetKillIndices(); std::vector<unsigned> &DefIndices = State->GetDefIndices(); @@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); @@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->getDesc().isCall() || - MI->getDesc().hasExtraSrcRegAllocReq() || + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || TII->isPredicated(MI); // Scan the register uses for this instruction and update diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index fafc010..0c84be5 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -1,4 +1,4 @@ -//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===// +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" @@ -149,33 +150,40 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, /// consideration of global floating-point math flags. /// ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { - ISD::CondCode FPC, FOC; switch (Pred) { - case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break; - case FCmpInst::FCMP_OEQ: FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break; - case FCmpInst::FCMP_OGT: FOC = ISD::SETGT; FPC = ISD::SETOGT; break; - case FCmpInst::FCMP_OGE: FOC = ISD::SETGE; FPC = ISD::SETOGE; break; - case FCmpInst::FCMP_OLT: FOC = ISD::SETLT; FPC = ISD::SETOLT; break; - case FCmpInst::FCMP_OLE: FOC = ISD::SETLE; FPC = ISD::SETOLE; break; - case FCmpInst::FCMP_ONE: FOC = ISD::SETNE; FPC = ISD::SETONE; break; - case FCmpInst::FCMP_ORD: FOC = FPC = ISD::SETO; break; - case FCmpInst::FCMP_UNO: FOC = FPC = ISD::SETUO; break; - case FCmpInst::FCMP_UEQ: FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break; - case FCmpInst::FCMP_UGT: FOC = ISD::SETGT; FPC = ISD::SETUGT; break; - case FCmpInst::FCMP_UGE: FOC = ISD::SETGE; FPC = ISD::SETUGE; break; - case FCmpInst::FCMP_ULT: FOC = ISD::SETLT; FPC = ISD::SETULT; break; - case FCmpInst::FCMP_ULE: FOC = ISD::SETLE; FPC = ISD::SETULE; break; - case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; - case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; - default: - llvm_unreachable("Invalid FCmp predicate opcode!"); - FOC = FPC = ISD::SETFALSE; - break; + case FCmpInst::FCMP_FALSE: return ISD::SETFALSE; + case FCmpInst::FCMP_OEQ: return ISD::SETOEQ; + case FCmpInst::FCMP_OGT: return ISD::SETOGT; + case FCmpInst::FCMP_OGE: return ISD::SETOGE; + case FCmpInst::FCMP_OLT: return ISD::SETOLT; + case FCmpInst::FCMP_OLE: return ISD::SETOLE; + case FCmpInst::FCMP_ONE: return ISD::SETONE; + case FCmpInst::FCMP_ORD: return ISD::SETO; + case FCmpInst::FCMP_UNO: return ISD::SETUO; + case FCmpInst::FCMP_UEQ: return ISD::SETUEQ; + case FCmpInst::FCMP_UGT: return ISD::SETUGT; + case FCmpInst::FCMP_UGE: return ISD::SETUGE; + case FCmpInst::FCMP_ULT: return ISD::SETULT; + case FCmpInst::FCMP_ULE: return ISD::SETULE; + case FCmpInst::FCMP_UNE: return ISD::SETUNE; + case FCmpInst::FCMP_TRUE: return ISD::SETTRUE; + default: break; + } + llvm_unreachable("Invalid FCmp predicate opcode!"); + return ISD::SETFALSE; +} + +ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) { + switch (CC) { + case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ; + case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE; + case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT; + case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE; + case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT; + case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE; + default: break; } - if (NoNaNsFPMath) - return FOC; - else - return FPC; + return CC; } /// getICmpCondCode - Return the ISD condition code corresponding to @@ -221,12 +229,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; + (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || + !isa<UnreachableInst>(Term))) return false; // If I will have a chain, make sure no other instruction that will have a // chain interposes between I and the return. if (I->mayHaveSideEffects() || I->mayReadFromMemory() || - !I->isSafeToSpeculativelyExecute()) + !isSafeToSpeculativelyExecute(I)) for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; --BBI) { if (&*BBI == I) @@ -235,7 +244,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, if (isa<DbgInfoIntrinsic>(BBI)) continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || - !BBI->isSafeToSpeculativelyExecute()) + !isSafeToSpeculativelyExecute(BBI)) return false; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 711b796..0c4d0d5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1308,7 +1308,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) { } // Emit the function pointers in reverse priority order. - switch (MAI->getStructorOutputOrder()) { + switch (getObjFileLowering().getStructorOutputOrder()) { case Structors::None: break; case Structors::PriorityOrder: @@ -1659,6 +1659,28 @@ static void EmitGlobalConstantVector(const ConstantVector *CV, AP.OutStreamer.EmitZeros(Padding, AddrSpace); } +static void LowerVectorConstant(const Constant *CV, unsigned AddrSpace, + AsmPrinter &AP) { + // Look through bitcasts + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) + if (CE->getOpcode() == Instruction::BitCast) + CV = CE->getOperand(0); + + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) + return EmitGlobalConstantVector(V, AddrSpace, AP); + + // If we get here, we're stuck; report the problem to the user. + // FIXME: Are there any other useful tricks for vectors? + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported vector expression in static initializer: "; + WriteAsOperand(OS, CV, /*PrintType=*/false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } +} + static void EmitGlobalConstantStruct(const ConstantStruct *CS, unsigned AddrSpace, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! @@ -1813,8 +1835,8 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return; } - if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return EmitGlobalConstantVector(V, AddrSpace, AP); + if (CV->getType()->isVectorTy()) + return LowerVectorConstant(CV, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. @@ -1987,7 +2009,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { // Emit an alignment directive for this block, if needed. if (unsigned Align = MBB->getAlignment()) - EmitAlignment(Log2_32(Align)); + EmitAlignment(Align); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label @@ -2082,7 +2104,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { MachineInstr &MI = *II; // If it is not a simple branch, we are in a table somewhere. - if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch()) + if (!MI.isBranch() || MI.isIndirectBranch()) return false; // If we are the operands of one of the branches, this is not diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index f6ce17d..58fe2ed 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -12,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter OcamlGCPrinter.cpp Win64Exception.cpp ) - -add_llvm_library_dependencies(LLVMAsmPrinter - LLVMAnalysis - LLVMCodeGen - LLVMCore - LLVMMC - LLVMMCParser - LLVMSupport - LLVMTarget - ) diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 237998a..8cb5156 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -791,13 +791,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DISubprogram SP(Element); ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); @@ -988,7 +988,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { unsigned VK = SP.getVirtuality(); if (VK) { - addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIEBlock *Block = getDIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); @@ -1398,17 +1398,17 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. else - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (DT.isVirtual()) - addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, dwarf::DW_VIRTUALITY_virtual); // Objective-C properties. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index dc46a58..a3db96a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -442,6 +442,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + addSubprogramNames(TheCU, InlinedSP, ScopeDIE); + return ScopeDIE; } @@ -1414,7 +1418,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); - if (!DisableFramePointerElim(*MF)) + if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, dwarf::DW_FORM_flag, 1); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index e0f2e85..bf7f7ee 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -184,7 +184,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, /// CallToNoUnwindFunction - Return `true' if this is a call to a function /// marked `nounwind'. Return `false' otherwise. bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { - assert(MI->getDesc().isCall() && "This should be a call instruction!"); + assert(MI->isCall() && "This should be a call instruction!"); bool MarkedNoUnwind = false; bool SawFunc = false; @@ -243,7 +243,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { if (!MI->isLabel()) { - if (MI->getDesc().isCall()) + if (MI->isCall()) SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); continue; } diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt index 0f2059f..20b1f7b 100644 --- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = AsmPrinter parent = Libraries required_libraries = Analysis CodeGen Core MC MCParser Support Target - diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 5dec368..89894c3 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -432,10 +432,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, for (; I != E; ++I) { if (I->isDebugValue()) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isCall()) + if (I->isCall()) Time += 10; - else if (MCID.mayLoad() || MCID.mayStore()) + else if (I->mayLoad() || I->mayStore()) Time += 2; else ++Time; @@ -502,7 +501,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, break; } --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; ++NumTerms; } return NumTerms; @@ -550,8 +549,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // heuristics. unsigned EffectiveTailLen = CommonTailLen; if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && - !MBB1->back().getDesc().isBarrier() && - !MBB2->back().getDesc().isBarrier()) + !MBB1->back().isBarrier() && + !MBB2->back().isBarrier()) ++EffectiveTailLen; // Check if the common tail is long enough to be worthwhile. @@ -927,8 +926,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(IBB, PredBB); // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. - PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + // The 1 below can occur as a result of removing blocks in + // TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks if (MergePotentials.size() == 1 && MergePotentials.begin()->getBlock() != PredBB) FixTail(MergePotentials.begin()->getBlock(), IBB, TII); @@ -983,7 +983,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { if (!MBBI->isDebugValue()) break; } - return (MBBI->getDesc().isBranch()); + return (MBBI->isBranch()); } /// IsBetterFallthrough - Return true if it would be clearly better to @@ -1011,7 +1011,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock::iterator MBB2I = --MBB2->end(); while (MBB2I->isDebugValue()) --MBB2I; - return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); + return MBB2I->isCall() && !MBB1I->isCall(); } /// OptimizeBlock - Analyze and optimize control flow related to the specified diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index c8d4dcf..7aee3bb 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMCodeGen CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp + DFAPacketizer.cpp DwarfEHPrepare.cpp EdgeBundles.cpp ELFCodeEmitter.cpp @@ -46,6 +47,7 @@ add_llvm_library(LLVMCodeGen MachineFunctionPass.cpp MachineFunctionPrinterPass.cpp MachineInstr.cpp + MachineInstrBundle.cpp MachineLICM.cpp MachineLoopInfo.cpp MachineLoopRanges.cpp @@ -87,27 +89,18 @@ add_llvm_library(LLVMCodeGen Spiller.cpp SpillPlacement.cpp SplitKit.cpp - Splitter.cpp StackProtector.cpp StackSlotColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp + TargetFrameLoweringImpl.cpp TargetInstrInfoImpl.cpp TargetLoweringObjectFileImpl.cpp + TargetOptionsImpl.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp ) -add_llvm_library_dependencies(LLVMCodeGen - LLVMAnalysis - LLVMCore - LLVMMC - LLVMScalarOpts - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) - add_subdirectory(SelectionDAG) add_subdirectory(AsmPrinter) diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 3112c22..48b71d9 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -45,7 +45,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); - initializeLoopSplitterPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeStrongPHIEliminationPass(Registry); diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 84c4d59..128143e 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -54,7 +54,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // Clear "do not change" set. KeepRegs.clear(); - bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); // Determine the live-out physregs for this block. if (IsReturnBlock) { @@ -193,8 +193,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->getDesc().isCall() || - MI->getDesc().hasExtraSrcRegAllocReq() || + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || TII->isPredicated(MI); // Scan the register operands for this instruction and update @@ -572,7 +572,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp new file mode 100644 index 0000000..16276bd --- /dev/null +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -0,0 +1,98 @@ +//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This class implements a deterministic finite automaton (DFA) based +// packetizing mechanism for VLIW architectures. It provides APIs to +// determine whether there exists a legal mapping of instructions to +// functional unit assignments in a packet. The DFA is auto-generated from +// the target's Schedule.td file. +// +// A DFA consists of 3 major elements: states, inputs, and transitions. For +// the packetizing mechanism, the input is the set of instruction classes for +// a target. The state models all possible combinations of functional unit +// consumption for a given set of instructions in a packet. A transition +// models the addition of an instruction to a packet. In the DFA constructed +// by this class, if an instruction can be added to a packet, then a valid +// transition exists from the corresponding state. Invalid transitions +// indicate that the instruction cannot be added to the current packet. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCInstrItineraries.h" +using namespace llvm; + +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], + const unsigned *SET): + InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), + DFAStateEntryTable(SET) {} + + +// +// ReadTable - Read the DFA transition table and update CachedTable. +// +// Format of the transition tables: +// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid +// transitions +// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable +// for the ith state +// +void DFAPacketizer::ReadTable(unsigned int state) { + unsigned ThisState = DFAStateEntryTable[state]; + unsigned NextStateInTable = DFAStateEntryTable[state+1]; + // Early exit in case CachedTable has already contains this + // state's transitions. + if (CachedTable.count(UnsignPair(state, + DFAStateInputTable[ThisState][0]))) + return; + + for (unsigned i = ThisState; i < NextStateInTable; i++) + CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = + DFAStateInputTable[i][1]; +} + + +// canReserveResources - Check if the resources occupied by a MCInstrDesc +// are available in the current state. +bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + return (CachedTable.count(StateTrans) != 0); +} + + +// reserveResources - Reserve the resources occupied by a MCInstrDesc and +// change the current state to reflect that change. +void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); + unsigned FuncUnits = IS->getUnits(); + UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + ReadTable(CurrentState); + assert(CachedTable.count(StateTrans) != 0); + CurrentState = CachedTable[StateTrans]; +} + + +// canReserveResources - Check if the resources occupied by a machine +// instruction are available in the current state. +bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + return canReserveResources(&MID); +} + +// reserveResources - Reserve the resources occupied by a machine +// instruction and change the current state to reflect that change. +void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + reserveResources(&MID); +} diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 6de6c0c..ba135e1 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -102,7 +102,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { LivePhysRegs = ReservedRegs; // Also add any explicit live-out physregs for this block. - if (!MBB->empty() && MBB->back().getDesc().isReturn()) + if (!MBB->empty() && MBB->back().isReturn()) for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { unsigned Reg = *LOI; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 300f037..4ec75cd 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -454,7 +454,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { assert(!MI->isDebugValue() && "Won't process debug values"); const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, - e = MCID.isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index a67140e..b5f107d 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -62,8 +62,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { MachineInstr *MI = MBBI++; // If MI is a pseudo, expand it. - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.usesCustomInsertionHook()) { + if (MI->usesCustomInsertionHook()) { Changed = true; MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index e2a14a8..3d23db0 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -207,7 +207,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { ++mi; // Only expand pseudos. - if (!MI->getDesc().isPseudo()) + if (!MI->isPseudo()) continue; // Give targets a chance to expand even standard pseudos. diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 9349797..e2c7132 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -386,7 +386,7 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { BBE = MF.end(); BBI != BBE; ++BBI) for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); MI != ME; ++MI) - if (MI->getDesc().isCall()) + if (MI->isCall()) VisitCallPoint(MI); } diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index d888939..bd31fdf 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, // blocks, move the end iterators up past any branch instructions. while (TIE != TIB) { --TIE; - if (!TIE->getDesc().isBranch()) + if (!TIE->isBranch()) break; } while (FIE != FIB) { --FIE; - if (!FIE->getDesc().isBranch()) + if (!FIE->isBranch()) break; } @@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (I->isDebugValue()) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isNotDuplicable()) + if (I->isNotDuplicable()) BBI.CannotBeCopied = true; bool isPredicated = TII->isPredicated(I); - bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch(); + bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); if (!isCondBr) { if (!isPredicated) { @@ -1395,9 +1394,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { - const MCInstrDesc &MCID = I->getDesc(); // Do not copy the end of the block branches. - if (IgnoreBr && MCID.isBranch()) + if (IgnoreBr && I->isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 59907d9..9bf810e 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -759,7 +759,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // Find all spills and copies of VNI. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); MachineInstr *MI = UI.skipInstruction();) { - if (!MI->isCopy() && !MI->getDesc().mayStore()) + if (!MI->isCopy() && !MI->mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); if (LI->getVNInfoAt(Idx) != VNI) @@ -878,7 +878,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // Before rematerializing into a register for a single instruction, try to // fold a load into the instruction. That avoids allocating a new register. - if (RM.OrigMI->getDesc().canFoldAsLoad() && + if (RM.OrigMI->canFoldAsLoad() && foldMemoryOperand(MI, Ops, RM.OrigMI)) { Edit->markRematerialized(RM.ParentVNI); ++NumFoldedLoads; @@ -957,7 +957,7 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill); // Get rid of deleted and empty intervals. for (unsigned i = RegsToSpill.size(); i != 0; --i) { @@ -1240,7 +1240,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill); } // Finally delete the SnippetCopies. diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index 2eebb08..fee0347 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -15,9 +15,11 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmPrinter SelectionDAG + [component_0] type = Library name = CodeGen parent = Libraries required_libraries = Analysis Core MC Scalar Support Target TransformUtils - diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 03b5693..62227fd 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -41,10 +41,6 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -namespace llvm { - bool EnableFastISel; -} - static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -114,9 +110,10 @@ EnableFastISelOption("fast-isel", cl::Hidden, LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, + TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, Triple, CPU, FS) { + : TargetMachine(T, Triple, CPU, FS, Options) { CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); AsmInfo = T.createMCAsmInfo(Triple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, @@ -275,14 +272,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, return false; // success! } -static void printNoVerify(PassManagerBase &PM, const char *Banner) { - if (PrintMachineCode) +void LLVMTargetMachine::printNoVerify(PassManagerBase &PM, + const char *Banner) const { + if (Options.PrintMachineCode) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); } -static void printAndVerify(PassManagerBase &PM, - const char *Banner) { - if (PrintMachineCode) +void LLVMTargetMachine::printAndVerify(PassManagerBase &PM, + const char *Banner) const { + if (Options.PrintMachineCode) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) @@ -380,7 +378,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (EnableFastISelOption == cl::BOU_TRUE || (getOptLevel() == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) - EnableFastISel = true; + Options.EnableFastISel = true; // Ask the target for an isel. if (addInstSelector(PM)) diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index eb54baa7..c35302a 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -920,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, } // Don't insert anything after the first terminator, though. - return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() : - llvm::next(MachineBasicBlock::iterator(MI)); + return MI->isTerminator() ? MBB->getFirstTerminator() : + llvm::next(MachineBasicBlock::iterator(MI)); } DebugLoc UserValue::findDebugLoc() { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index edcfebe..1e58173 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -794,7 +794,7 @@ LiveIntervals::getLastSplitPoint(const LiveInterval &li, MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin(); while (I != B) { --I; - if (I->getDesc().isCall()) + if (I->isCall()) return I; } // The block contains no calls that can throw, so use the first terminator. diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 2f283b2..a470877 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -129,7 +129,7 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, } // If only cheap remats were requested, bail out early. - if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove()) + if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove()) return false; // Verify that all used registers are available with the same values. @@ -174,7 +174,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (MO.isDef()) { if (DefMI && DefMI != MI) return false; - if (!MI->getDesc().canFoldAsLoad()) + if (!MI->canFoldAsLoad()) return false; DefMI = MI; } else if (!MO.isUndef()) { @@ -210,7 +210,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, LiveIntervals &LIS, VirtRegMap &VRM, - const TargetInstrInfo &TII) { + const TargetInstrInfo &TII, + ArrayRef<unsigned> RegsBeingSpilled) { SetVector<LiveInterval*, SmallVector<LiveInterval*, 8>, SmallPtrSet<LiveInterval*, 8> > ToShrink; @@ -290,6 +291,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, delegate_->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; + + // Don't create new intervals for a register being spilled. + // The new intervals would have to be spilled anyway so its not worth it. + // Also they currently aren't spilled so creating them and not spilling + // them results in incorrect code. + bool BeingSpilled = false; + for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { + if (LI->reg == RegsBeingSpilled[i]) { + BeingSpilled = true; + break; + } + } + + if (BeingSpilled) continue; + // LI may have been separated, create new intervals. LI->RenumberValues(LIS); diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h index 9b0a671..057d9bb 100644 --- a/lib/CodeGen/LiveRangeEdit.h +++ b/lib/CodeGen/LiveRangeEdit.h @@ -191,9 +191,14 @@ public: /// eliminateDeadDefs - Try to delete machine instructions that are now dead /// (allDefsAreDead returns true). This may cause live intervals to be trimmed /// and further dead efs to be eliminated. + /// RegsBeingSpilled lists registers currently being spilled by the register + /// allocator. These registers should not be split into new intervals + /// as currently those new intervals are not guaranteed to spill. void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, LiveIntervals&, VirtRegMap&, - const TargetInstrInfo&); + const TargetInstrInfo&, + ArrayRef<unsigned> RegsBeingSpilled + = ArrayRef<unsigned>()); /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 2ca90f9..7477d91 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -590,8 +590,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // them. The tail callee need not take the same registers as input // that it produces as output, and there are dependencies for its input // registers elsewhere. - if (!MBB->empty() && MBB->back().getDesc().isReturn() - && !MBB->back().getDesc().isCall()) { + if (!MBB->empty() && MBB->back().isReturn() + && !MBB->back().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator @@ -754,7 +754,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, const unsigned NumNew = BB->getNumber(); // All registers used by PHI nodes in SuccBB must be live through BB. - for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(), + for (MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) if (BBI->getOperand(i+1).getMBB() == BB) diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index b9d1ef7..6734916 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { // Make sure the instructions have their operands in the reginfo lists. MachineRegisterInfo &RegInfo = MF.getRegInfo(); - for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I) + for (MachineBasicBlock::instr_iterator + I = N->instr_begin(), E = N->instr_end(); I != E; ++I) I->AddRegOperandsToUseLists(RegInfo); LeakDetector::removeGarbageObject(N); @@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { /// lists. void ilist_traits<MachineInstr>:: transferNodesFromList(ilist_traits<MachineInstr> &fromList, - MachineBasicBlock::iterator first, - MachineBasicBlock::iterator last) { + ilist_iterator<MachineInstr> first, + ilist_iterator<MachineInstr> last) { assert(Parent->getParent() == fromList.Parent->getParent() && "MachineInstr parent mismatch!"); @@ -140,9 +141,10 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { } MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { - iterator I = begin(); + instr_iterator I = instr_begin(); while (I != end() && I->isPHI()) ++I; + assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!"); return I; } @@ -150,23 +152,63 @@ MachineBasicBlock::iterator MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue())) ++I; + // FIXME: This needs to change if we wish to bundle labels / dbg_values + // inside the bundle. + assert(!I->isInsideBundle() && + "First non-phi / non-label instruction is inside a bundle!"); return I; } MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { iterator I = end(); - while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue())) + while (I != begin() && ((--I)->isTerminator() || I->isDebugValue())) ; /*noop */ - while (I != end() && !I->getDesc().isTerminator()) + while (I != end() && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getFirstTerminator() const { + const_iterator I = end(); + while (I != begin() && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != end() && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { + instr_iterator I = instr_end(); + while (I != instr_begin() && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != instr_end() && !I->isTerminator()) ++I; return I; } MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { - iterator B = begin(), I = end(); + // Skip over end-of-block dbg_value instructions. + instr_iterator B = instr_begin(), I = instr_end(); while (I != B) { --I; - if (I->isDebugValue()) + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) + continue; + return I; + } + // The block is all debug values. + return end(); +} + +MachineBasicBlock::const_iterator +MachineBasicBlock::getLastNonDebugInstr() const { + // Skip over end-of-block dbg_value instructions. + const_instr_iterator B = instr_begin(), I = instr_end(); + while (I != B) { + --I; + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) continue; return I; } @@ -203,8 +245,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { return; } - if (Alignment) { OS << "Alignment " << Alignment << "\n"; } - if (Indexes) OS << Indexes->getMBBStartIdx(this) << '\t'; @@ -218,6 +258,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } + if (Alignment) { + OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) + << " bytes)"; + Comma = ", "; + } + OS << '\n'; const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); @@ -237,13 +283,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\n'; } - for (const_iterator I = begin(); I != end(); ++I) { + for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { if (Indexes) { if (Indexes->hasIndex(I)) OS << Indexes->getInstructionIndex(I); OS << '\t'; } OS << '\t'; + if (I->isInsideBundle()) + OS << " * "; I->print(OS, &getParent()->getTarget()); } @@ -449,8 +497,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. - for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end(); - MI != ME && MI->isPHI(); ++MI) + for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), + ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { MachineOperand &MO = MI->getOperand(i); if (MO.getMBB() == fromMBB) @@ -492,8 +540,8 @@ bool MachineBasicBlock::canFallThrough() { // Barrier is predicated and thus no longer an actual control barrier. This // is over-conservative though, because if an instruction isn't actually // predicated we could still treat it like a barrier. - return empty() || !back().getDesc().isBarrier() || - back().getDesc().isPredicable(); + return empty() || !back().isBarrier() || + back().isPredicable(); } // If there is no branch, control always falls through. @@ -552,7 +600,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; if (LV) - for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { MachineInstr *MI = I; for (MachineInstr::mop_iterator OI = MI->operands_begin(), OE = MI->operands_end(); OI != OE; ++OI) { @@ -579,7 +628,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } // Fix PHI nodes in Succ so they refer to NMBB instead of this - for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); + for (MachineBasicBlock::instr_iterator + i = Succ->instr_begin(),e = Succ->instr_end(); i != e && i->isPHI(); ++i) for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) if (i->getOperand(ni+1).getMBB() == this) @@ -595,7 +645,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); - for (iterator I = end(), E = begin(); I != E;) { + for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false)) continue; LV->getVarInfo(Reg).Kills.push_back(I); @@ -664,6 +714,41 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { return NMBB; } +MachineBasicBlock::iterator +MachineBasicBlock::erase(MachineBasicBlock::iterator I) { + if (I->isBundle()) { + MachineBasicBlock::iterator E = llvm::next(I); + return Insts.erase(I.getInstrIterator(), E.getInstrIterator()); + } + + return Insts.erase(I.getInstrIterator()); +} + +MachineInstr *MachineBasicBlock::remove(MachineInstr *I) { + if (I->isBundle()) { + MachineBasicBlock::instr_iterator MII = I; ++MII; + while (MII != end() && MII->isInsideBundle()) { + MachineInstr *MI = &*MII++; + Insts.remove(MI); + } + } + + return Insts.remove(I); +} + +void MachineBasicBlock::splice(MachineBasicBlock::iterator where, + MachineBasicBlock *Other, + MachineBasicBlock::iterator From) { + if (From->isBundle()) { + MachineBasicBlock::iterator To = llvm::next(From); + Insts.splice(where.getInstrIterator(), Other->Insts, + From.getInstrIterator(), To.getInstrIterator()); + return; + } + + Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator()); +} + /// removeFromParent - This method unlinks 'this' from the containing function, /// and returns it, but does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { @@ -687,10 +772,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New) { assert(Old != New && "Cannot replace self with self!"); - MachineBasicBlock::iterator I = end(); - while (I != begin()) { + MachineBasicBlock::instr_iterator I = instr_end(); + while (I != instr_begin()) { --I; - if (!I->getDesc().isTerminator()) break; + if (!I->isTerminator()) break; // Scan the operands of this machine instruction, replacing any uses of Old // with New. @@ -769,17 +854,17 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping /// any DBG_VALUE instructions. Return UnknownLoc if there is none. DebugLoc -MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) { +MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { DebugLoc DL; - MachineBasicBlock::iterator E = end(); - if (MBBI != E) { - // Skip debug declarations, we don't want a DebugLoc from them. - MachineBasicBlock::iterator MBBI2 = MBBI; - while (MBBI2 != E && MBBI2->isDebugValue()) - MBBI2++; - if (MBBI2 != E) - DL = MBBI2->getDebugLoc(); - } + instr_iterator E = instr_end(); + if (MBBI == E) + return DL; + + // Skip debug declarations, we don't want a DebugLoc from them. + while (MBBI != E && MBBI->isDebugValue()) + MBBI++; + if (MBBI != E) + DL = MBBI->getDebugLoc(); return DL; } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 55d804b..638d895 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -36,10 +36,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -56,22 +53,6 @@ STATISTIC(UncondBranchTakenFreq, "Potential frequency of taking unconditional branches"); namespace { -/// \brief A structure for storing a weighted edge. -/// -/// This stores an edge and its weight, computed as the product of the -/// frequency that the starting block is entered with the probability of -/// a particular exit block. -struct WeightedEdge { - BlockFrequency EdgeFrequency; - MachineBasicBlock *From, *To; - - bool operator<(const WeightedEdge &RHS) const { - return EdgeFrequency < RHS.EdgeFrequency; - } -}; -} - -namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType; @@ -222,6 +203,9 @@ class MachineBlockPlacement : public MachineFunctionPass { void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *findBestLoopTop(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); void buildCFGChains(MachineFunction &F); void AlignLoops(MachineFunction &F); @@ -546,12 +530,134 @@ void MachineBlockPlacement::buildChain( markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *llvm::prior(Chain.end()); - }; + } DEBUG(dbgs() << "Finished forming chain for header block " << getBlockNum(*Chain.begin()) << "\n"); } +/// \brief Find the best loop top block for layout. +/// +/// This routine implements the logic to analyze the loop looking for the best +/// block to layout at the top of the loop. Typically this is done to maximize +/// fallthrough opportunities. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + BlockFrequency BestExitEdgeFreq; + MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *LoopingBB = 0; + // If there are exits to outer loops, loop rotation can severely limit + // fallthrough opportunites unless it selects such an exit. Keep a set of + // blocks where rotating to exit with that block will reach an outer loop. + SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; + + DEBUG(dbgs() << "Finding best loop exit for: " + << getBlockName(L.getHeader()) << "\n"); + for (MachineLoop::block_iterator I = L.block_begin(), + E = L.block_end(); + I != E; ++I) { + BlockChain &Chain = *BlockToChain[*I]; + // Ensure that this block is at the end of a chain; otherwise it could be + // mid-way through an inner loop or a successor of an analyzable branch. + if (*I != *llvm::prior(Chain.end())) + continue; + + // Now walk the successors. We need to establish whether this has a viable + // exiting successor and whether it has a viable non-exiting successor. + // We store the old exiting state and restore it if a viable looping + // successor isn't found. + MachineBasicBlock *OldExitingBB = ExitingBB; + BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; + // We also compute and store the best looping successor for use in layout. + MachineBasicBlock *BestLoopSucc = 0; + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we use the internal weights. This is only valid + // because it is purely a ranking function, we don't care about anything + // but the relative values. + uint32_t BestLoopSuccWeight = 0; + // FIXME: We also manually compute the probabilities to avoid quadratic + // behavior. + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); + for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), + SE = (*I)->succ_end(); + SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + if (*SI == *I) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Don't split chains, either this chain or the successor's chain. + if (&Chain == &SuccChain || *SI != *SuccChain.begin()) { + DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: " + : "exiting: ") + << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (chain conflict)\n"); + continue; + } + + uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI); + if (LoopBlockSet.count(*SI)) { + DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (" << SuccWeight << ")\n"); + if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight) + continue; + + BestLoopSucc = *SI; + BestLoopSuccWeight = SuccWeight; + continue; + } + + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " + << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n"); + // Note that we slightly bias this toward an existing layout successor to + // retain incoming order in the absence of better information. + // FIXME: Should we bias this more strongly? It's pretty weak. + if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq || + ((*I)->isLayoutSuccessor(*SI) && + !(ExitEdgeFreq < BestExitEdgeFreq))) { + BestExitEdgeFreq = ExitEdgeFreq; + ExitingBB = *I; + } + + if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(*I); + } + + // Restore the old exiting state, no viable looping successor was found. + if (!BestLoopSucc) { + ExitingBB = OldExitingBB; + BestExitEdgeFreq = OldBestExitEdgeFreq; + continue; + } + + // If this was best exiting block thus far, also record the looping block. + if (ExitingBB == *I) + LoopingBB = BestLoopSucc; + } + // Without a candidate exitting block or with only a single block in the + // loop, just use the loop header to layout the loop. + if (!ExitingBB || L.getNumBlocks() == 1) + return L.getHeader(); + + // Also, if we have exit blocks which lead to outer loops but didn't select + // one of them as the exiting block we are rotating toward, disable loop + // rotation altogether. + if (!BlocksExitingToOuterLoop.empty() && + !BlocksExitingToOuterLoop.count(ExitingBB)) + return L.getHeader(); + + assert(LoopingBB && "All successors of a loop block are exit blocks!"); + DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n"); + return LoopingBB; +} + /// \brief Forms basic block chains from the natural loop structures. /// /// These chains are designed to preserve the existing *structure* of the code @@ -567,17 +673,21 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallVector<MachineBasicBlock *, 16> BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); - BlockChain &LoopChain = *BlockToChain[L.getHeader()]; + + MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet); + BlockChain &LoopChain = *BlockToChain[LayoutTop]; // FIXME: This is a really lame way of walking the chains in the loop: we // walk the blocks, and use a set to prevent visiting a particular chain // twice. SmallPtrSet<BlockChain *, 4> UpdatedPreds; + assert(LoopChain.LoopPredecessors == 0); + UpdatedPreds.insert(&LoopChain); for (MachineLoop::block_iterator BI = L.block_begin(), BE = L.block_end(); BI != BE; ++BI) { BlockChain &Chain = *BlockToChain[*BI]; - if (!UpdatedPreds.insert(&Chain) || BI == L.block_begin()) + if (!UpdatedPreds.insert(&Chain)) continue; assert(Chain.LoopPredecessors == 0); @@ -597,7 +707,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BlockWorkList.push_back(*Chain.begin()); } - buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 7eda8c1..8c02cd7 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -260,12 +260,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { return false; // Ignore stuff that we obviously can't move. - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() || + if (MI->mayStore() || MI->isCall() || MI->isTerminator() || MI->hasUnmodeledSideEffects()) return false; - if (MCID.mayLoad()) { + if (MI->mayLoad()) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. @@ -287,7 +286,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. - if (MI->getDesc().isAsCheapAsAMove()) { + if (MI->isAsCheapAsAMove()) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) @@ -376,7 +375,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Commute commutable instructions. bool Commuted = false; - if (!FoundCSE && MI->getDesc().isCommutable()) { + if (!FoundCSE && MI->isCommutable()) { MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI) { Commuted = true; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b0ef9d4..ec5a1cd 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -178,6 +178,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsKill = isKill; IsDead = isDead; IsUndef = isUndef; + IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; SubReg = 0; @@ -240,7 +241,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || - isEarlyClobber()) { + isInternalRead() || isEarlyClobber()) { OS << '<'; bool NeedComma = false; if (isDef()) { @@ -256,14 +257,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { NeedComma = true; } - if (isKill() || isDead() || isUndef()) { + if (isKill() || isDead() || isUndef() || isInternalRead()) { if (NeedComma) OS << ','; - if (isKill()) OS << "kill"; - if (isDead()) OS << "dead"; + NeedComma = false; + if (isKill()) { + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + OS << "dead"; + NeedComma = true; + } if (isUndef()) { - if (isKill() || isDead()) - OS << ','; + if (NeedComma) OS << ','; OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; } } OS << '>'; @@ -735,6 +748,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF, MemRefsEnd = NewMemRefsEnd; } +bool +MachineInstr::hasProperty(unsigned MCFlag, QueryType Type) const { + if (Type == IgnoreBundle || !isBundle()) + return getDesc().getFlags() & (1 << MCFlag); + + const MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::const_instr_iterator MII = *this; ++MII; + while (MII != MBB->end() && MII->isInsideBundle()) { + if (MII->getDesc().getFlags() & (1 << MCFlag)) { + if (Type == AnyInBundle) + return true; + } else { + if (Type == AllInBundle) + return false; + } + ++MII; + } + + return Type == AllInBundle; +} + bool MachineInstr::isIdenticalTo(const MachineInstr *Other, MICheckType Check) const { // If opcodes or number of operands are not the same then the two @@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, Other->getNumOperands() != getNumOperands()) return false; + if (isBundle()) { + // Both instructions are bundles, compare MIs inside the bundle. + MachineBasicBlock::const_instr_iterator I1 = *this; + MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I2 = *Other; + MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); + while (++I1 != E1 && I1->isInsideBundle()) { + ++I2; + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check)) + return false; + } + } + // Check operands to make sure they match. for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, /// block, and returns it, but does not delete it. MachineInstr *MachineInstr::removeFromParent() { assert(getParent() && "Not embedded in a basic block!"); + + // If it's a bundle then remove the MIs inside the bundle as well. + if (isBundle()) { + MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::instr_iterator MII = *this; ++MII; + MachineBasicBlock::instr_iterator E = MBB->instr_end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII; + ++MII; + MBB->remove(MI); + } + } getParent()->remove(this); return this; } @@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() { /// block, and deletes it. void MachineInstr::eraseFromParent() { assert(getParent() && "Not embedded in a basic block!"); + // If it's a bundle then remove the MIs inside the bundle as well. + if (isBundle()) { + MachineBasicBlock *MBB = getParent(); + MachineBasicBlock::instr_iterator MII = *this; ++MII; + MachineBasicBlock::instr_iterator E = MBB->instr_end(); + while (MII != E && MII->isInsideBundle()) { + MachineInstr *MI = &*MII; + ++MII; + MBB->erase(MI); + } + } getParent()->erase(this); } @@ -887,6 +957,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } +/// getBundleSize - Return the number of instructions inside the MI bundle. +unsigned MachineInstr::getBundleSize() const { + assert(isBundle() && "Expecting a bundle"); + + MachineBasicBlock::const_instr_iterator I = *this; + unsigned Size = 0; + while ((++I)->isInsideBundle()) { + ++Size; + } + assert(Size > 1 && "Malformed bundle"); + + return Size; +} + /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. @@ -1118,6 +1202,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { /// copyPredicates - Copies predicate operand(s) from MI. void MachineInstr::copyPredicates(const MachineInstr *MI) { + assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isPredicable()) return; @@ -1159,13 +1245,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA, bool &SawStore) const { // Ignore stuff that we obviously can't move. - if (MCID->mayStore() || MCID->isCall()) { + if (mayStore() || isCall()) { SawStore = true; return false; } if (isLabel() || isDebugValue() || - MCID->isTerminator() || hasUnmodeledSideEffects()) + isTerminator() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1173,7 +1259,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (MCID->mayLoad() && !isInvariantLoad(AA)) + if (mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); @@ -1213,9 +1299,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, /// have no volatile memory references. bool MachineInstr::hasVolatileMemoryRef() const { // An instruction known never to access memory won't have a volatile access. - if (!MCID->mayStore() && - !MCID->mayLoad() && - !MCID->isCall() && + if (!mayStore() && + !mayLoad() && + !isCall() && !hasUnmodeledSideEffects()) return false; @@ -1239,7 +1325,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { /// *all* loads the instruction does are invariant (if it does multiple loads). bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If the instruction doesn't load at all, it isn't an invariant load. - if (!MCID->mayLoad()) + if (!mayLoad()) return false; // If the instruction has lost its memoperands, conservatively assume that @@ -1292,7 +1378,7 @@ unsigned MachineInstr::isConstantValuePHI() const { } bool MachineInstr::hasUnmodeledSideEffects() const { - if (getDesc().hasUnmodeledSideEffects()) + if (hasProperty(MCID::UnmodeledSideEffects)) return true; if (isInlineAsm()) { unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); @@ -1420,7 +1506,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // call instructions much less noisy on targets where calls clobber lots // of registers. Don't rely on MO.isDead() because we may be called before // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MF && getDesc().isCall() && + if (MF && isCall() && MO.isReg() && MO.isImplicit() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp new file mode 100644 index 0000000..b766d08 --- /dev/null +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -0,0 +1,180 @@ +//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +namespace { + class UnpackMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + UnpackMachineBundles() : MachineFunctionPass(ID) { + initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + }; +} // end anonymous namespace + +char UnpackMachineBundles::ID = 0; +INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundle", + "Unpack machine instruction bundles", false, false) + +FunctionPass *llvm::createUnpackMachineBundlesPass() { + return new UnpackMachineBundles(); +} + +bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(), + MIE = MBB->instr_end(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // Remove BUNDLE instruction and the InsideBundle flags from bundled + // instructions. + if (MI->isBundle()) { + while (++MII != MIE && MII->isInsideBundle()) { + MII->setIsInsideBundle(false); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + } + MI->eraseFromParent(); + + Changed = true; + continue; + } + + ++MII; + } + } + + return Changed; +} + +/// FinalizeBundle - Finalize a machine instruction bundle which includes +/// a sequence of instructions starting from FirstMI to LastMI (inclusive). +/// This routine adds a BUNDLE instruction to represent the bundle, it adds +/// IsInternalRead markers to MachineOperands which are defined inside the +/// bundle, and it copies externally visible defs and uses to the BUNDLE +/// instruction. +void llvm::FinalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI, + MachineBasicBlock::instr_iterator LastMI) { + const TargetMachine &TM = MBB.getParent()->getTarget(); + const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(), + TII->get(TargetOpcode::BUNDLE)); + + SmallVector<unsigned, 8> LocalDefs; + SmallSet<unsigned, 8> LocalDefSet; + SmallSet<unsigned, 8> DeadDefSet; + SmallSet<unsigned, 8> KilledDefSet; + SmallVector<unsigned, 8> ExternUses; + SmallSet<unsigned, 8> ExternUseSet; + SmallSet<unsigned, 8> KilledUseSet; + SmallSet<unsigned, 8> UndefUseSet; + SmallVector<MachineOperand*, 4> Defs; + do { + for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = FirstMI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(&MO); + continue; + } + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { + MO.setIsInternalRead(); + if (MO.isKill()) + // Internal def is now killed. + KilledDefSet.insert(Reg); + } else { + if (ExternUseSet.insert(Reg)) { + ExternUses.push_back(Reg); + if (MO.isUndef()) + UndefUseSet.insert(Reg); + } + if (MO.isKill()) + // External def is now killed. + KilledUseSet.insert(Reg); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + MachineOperand &MO = *Defs[i]; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (LocalDefSet.insert(Reg)) { + LocalDefs.push_back(Reg); + if (MO.isDead()) { + DeadDefSet.insert(Reg); + } + } else { + // Re-defined inside the bundle, it's no longer killed. + KilledDefSet.erase(Reg); + if (!MO.isDead()) + // Previously defined but dead. + DeadDefSet.erase(Reg); + } + + if (!MO.isDead()) { + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (LocalDefSet.insert(SubReg)) + LocalDefs.push_back(SubReg); + } + } + } + + FirstMI->setIsInsideBundle(); + Defs.clear(); + } while (FirstMI++ != LastMI); + + SmallSet<unsigned, 8> Added; + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + if (Added.insert(Reg)) { + // If it's not live beyond end of the bundle, mark it dead. + bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); + MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) | + getImplRegState(true)); + } + } + + for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) { + unsigned Reg = ExternUses[i]; + bool isKill = KilledUseSet.count(Reg); + bool isUndef = UndefUseSet.count(Reg); + MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | + getImplRegState(true)); + } +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index e5e8c51..764429d 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -765,7 +765,7 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { /// isLoadFromGOTOrConstantPool - Return true if this machine instruction /// loads from global offset table or constant pool. static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { - assert (MI.getDesc().mayLoad() && "Expected MI that loads!"); + assert (MI.mayLoad() && "Expected MI that loads!"); for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), E = MI.memoperands_end(); I != E; ++I) { if (const Value *V = (*I)->getValue()) { @@ -792,7 +792,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // from constant memory are not safe to speculate all the time, for example // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. - if (I.getDesc().mayLoad() && !isLoadFromGOTOrConstantPool(I) && + if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && !IsGuaranteedToExecute(I.getParent())) return false; @@ -921,7 +921,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, /// IsCheapInstruction - Return true if the instruction is marked "cheap" or /// the operand latency between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { - if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike()) + if (MI.isAsCheapAsAMove() || MI.isCopyLike()) return true; if (!InstrItins || InstrItins->isEmpty()) return false; @@ -1105,7 +1105,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. - if (MI->getDesc().canFoldAsLoad()) + if (MI->canFoldAsLoad()) return 0; // If not, we may be able to unfold a load and hoist that. @@ -1141,8 +1141,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { assert(NewMIs.size() == 2 && "Unfolded a load into multiple instructions!"); MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MI, NewMIs[0]); - MBB->insert(MI, NewMIs[1]); + MachineBasicBlock::iterator Pos = MI; + MBB->insert(Pos, NewMIs[0]); + MBB->insert(Pos, NewMIs[1]); // If unfolding produced a load that wasn't loop-invariant or profitable to // hoist, discard the new instructions and bail. if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 84d6df2..8cb6112 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, if (BB->empty()) return 0; - MachineBasicBlock::iterator I = BB->front(); + MachineBasicBlock::iterator I = BB->begin(); if (!I->isPHI()) return 0; @@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { return DupPHI; // Otherwise, we do need a PHI: insert one now. - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, VRC, MRI, TII); @@ -311,7 +311,7 @@ public: /// Add it into the specified block and return the register. static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, MachineSSAUpdater *Updater) { - MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, Updater->VRC, Updater->MRI, Updater->TII); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 29cfb49..e47360d 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -90,6 +90,12 @@ namespace { bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; + MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB, + bool &BreakPHIEdge); + bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo); + bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; @@ -147,14 +153,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); + // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) return true; - // Ignoring debug uses is necessary so debug info doesn't affect the code. - // This may leave a referencing dbg_value in the original block, before - // the definition of the vreg. Dwarf generator handles this although the - // user might not get the right info at runtime. - // BreakPHIEdge is true if all the uses are in the successor MBB being sunken // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. @@ -291,7 +293,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, if (!CEBCandidates.insert(std::make_pair(From, To))) return true; - if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove()) + if (!MI->isCopy() && !MI->isAsCheapAsAMove()) return true; // MI is cheap, we probably don't want to break the critical edge for it. @@ -401,35 +403,76 @@ static void collectDebugValues(MachineInstr *MI, } } -/// SinkInstruction - Determine whether it is safe to sink the specified machine -/// instruction out of its current block into a successor. -bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { - // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to - // be close to the source to make it easier to coalesce. - if (AvoidsSinking(MI, MRI)) +/// isPostDominatedBy - Return true if A is post dominated by B. +static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) { + + // FIXME - Use real post dominator. + if (A->succ_size() != 2) + return false; + MachineBasicBlock::succ_iterator I = A->succ_begin(); + if (B == *I) + ++I; + MachineBasicBlock *OtherSuccBlock = *I; + if (OtherSuccBlock->succ_size() != 1 || + *(OtherSuccBlock->succ_begin()) != B) return false; - // Check if it's safe to move the instruction. - if (!MI->isSafeToMove(TII, AA, SawStore)) + return true; +} + +/// isProfitableToSinkTo - Return true if it is profitable to sink MI. +bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo) { + assert (MI && "Invalid MachineInstr!"); + assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); + + if (MBB == SuccToSinkTo) return false; - // FIXME: This should include support for sinking instructions within the - // block they are currently in to shorten the live ranges. We often get - // instructions sunk into the top of a large block, but it would be better to - // also sink them down before their first use in the block. This xform has to - // be careful not to *increase* register pressure though, e.g. sinking - // "x = y + z" down if it kills y and z would increase the live ranges of y - // and z and only shrink the live range of x. + // It is profitable if SuccToSinkTo does not post dominate current block. + if (!isPostDominatedBy(MBB, SuccToSinkTo)) + return true; + + // Check if only use in post dominated block is PHI instruction. + bool NonPHIUse = false; + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseBlock == SuccToSinkTo && !UseInst->isPHI()) + NonPHIUse = true; + } + if (!NonPHIUse) + return true; + + // If SuccToSinkTo post dominates then also it may be profitable if MI + // can further profitably sinked into another block in next round. + bool BreakPHIEdge = false; + // FIXME - If finding successor is compile time expensive then catch results. + if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) + return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); + + // If SuccToSinkTo is final destination and it is a post dominator of current + // block then it is not profitable to sink MI into SuccToSinkTo block. + return false; +} + +/// FindSuccToSinkTo - Find a successor to sink this instruction to. +MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, + MachineBasicBlock *MBB, + bool &BreakPHIEdge) { + + assert (MI && "Invalid MachineInstr!"); + assert (MBB && "Invalid MachineBasicBlock!"); // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. - MachineBasicBlock *ParentBlock = MI->getParent(); // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = 0; - - bool BreakPHIEdge = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. @@ -443,23 +486,23 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. if (!MRI->def_empty(Reg)) - return false; + return NULL; if (AllocatableSet.test(Reg)) - return false; + return NULL; // Check for a def among the register's aliases too. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (!MRI->def_empty(AliasReg)) - return false; + return NULL; if (AllocatableSet.test(AliasReg)) - return false; + return NULL; } } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. - return false; + return NULL; } } else { // Virtual register uses are always safe to sink. @@ -467,7 +510,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) - return false; + return NULL; // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where @@ -488,48 +531,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. bool LocalUse = false; - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) - return false; + return NULL; continue; } // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), - E = ParentBlock->succ_end(); SI != E; ++SI) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; - if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, + if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, BreakPHIEdge, LocalUse)) { - SuccToSinkTo = *SI; + SuccToSinkTo = SuccBlock; break; } if (LocalUse) // Def is used locally, it's never safe to move this def. - return false; + return NULL; } // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) - return false; + return NULL; + else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) + return NULL; } } - // If there are no outputs, it must have side-effects. - if (SuccToSinkTo == 0) - return false; + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MBB == SuccToSinkTo) + return NULL; // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. - if (SuccToSinkTo->isLandingPad()) + if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) + return NULL; + + return SuccToSinkTo; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { + // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to + // be close to the source to make it easier to coalesce. + if (AvoidsSinking(MI, MRI)) return false; - // It is not possible to sink an instruction into its own block. This can - // happen with loops. - if (MI->getParent() == SuccToSinkTo) + // Check if it's safe to move the instruction. + if (!MI->isSafeToMove(TII, AA, SawStore)) return false; + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + bool BreakPHIEdge = false; + MachineBasicBlock *ParentBlock = MI->getParent(); + MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); + + // If there are no outputs, it must have side-effects. + if (SuccToSinkTo == 0) + return false; + + // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index f231e3c..0a2c2f8 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -279,13 +279,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { visitMachineBasicBlockBefore(MFI); - for (MachineBasicBlock::const_iterator MBBI = MFI->begin(), - MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { report("Bad instruction parent pointer", MFI); *OS << "Instruction: " << *MBBI; continue; } + // Skip BUNDLE instruction for now. FIXME: We should add code to verify + // the BUNDLE's specifically. + if (MBBI->isBundle()) + continue; visitMachineInstrBefore(MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) visitMachineOperand(&MBBI->getOperand(I), I); @@ -435,7 +439,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + if (!MBB->empty() && MBB->back().isBarrier() && !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); @@ -456,10 +460,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -479,10 +483,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (MBB->back().getDesc().isBarrier()) { + } else if (MBB->back().isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -499,10 +503,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!MBB->back().getDesc().isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -555,9 +559,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Check the MachineMemOperands for basic consistency. for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I) { - if ((*I)->isLoad() && !MCID.mayLoad()) + if ((*I)->isLoad() && !MI->mayLoad()) report("Missing mayLoad flag", MI); - if ((*I)->isStore() && !MCID.mayStore()) + if ((*I)->isStore() && !MI->mayStore()) report("Missing mayStore flag", MI); } @@ -575,7 +579,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } // Ensure non-terminators don't follow terminators. - if (MCID.isTerminator()) { + if (MI->isTerminator()) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { @@ -606,7 +610,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. if (MO->isReg() && - !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) { + !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { if (MO->isDef() && !MCOI.isOptionalDef()) report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) @@ -614,7 +618,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } else { // ARM adds %reg0 operands to indicate predicates. We'll allow that. - if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg()) + if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) report("Extra explicit operand on non-variadic instruction", MO, MONum); } @@ -800,11 +804,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (MCID.mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { + if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (MCID.mayStore() && !LI.liveAt(Idx.getRegSlot())) { + if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 6994aa5..0e52496 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -410,7 +410,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, return false; // Quick exit for basic blocks without PHIs. bool Changed = false; - for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); + for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->isPHI(); ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index bbc7ce2..2a5652a 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -292,7 +292,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, assert(Def && Src && "Malformed bitcast instruction!"); MachineInstr *DefMI = MRI->getVRegDef(Src); - if (!DefMI || !DefMI->getDesc().isBitcast()) + if (!DefMI || !DefMI->isBitcast()) return false; unsigned SrcSrc = 0; @@ -353,7 +353,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isMoveImmediate()) + if (!MI->isMoveImmediate()) return false; if (MCID.getNumDefs() != 1) return false; @@ -428,9 +428,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - const MCInstrDesc &MCID = MI->getDesc(); - - if (MCID.isBitcast()) { + if (MI->isBitcast()) { if (OptimizeBitcastInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); @@ -438,7 +436,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MII = First ? I->begin() : llvm::next(PMII); continue; } - } else if (MCID.isCompare()) { + } else if (MI->isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. LocalMIs.erase(MI); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 7205ed6..fa832c8 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -212,7 +212,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { RegClassInfo.runOnMachineFunction(Fn); // Check for explicit enable/disable of post-ra scheduling. - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE; + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = + TargetSubtargetInfo::ANTIDEP_NONE; SmallVector<TargetRegisterClass*, 4> CriticalPathRCs; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) @@ -271,6 +272,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } I = MI; --Count; + if (MI->isBundle()) + Count -= MI->getBundleSize(); } assert(Count == 0 && "Instruction count mismatch!"); assert((MBB->begin() == Current || CurrentCount != 0) && @@ -364,7 +367,7 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { KillIndices[i] = ~0u; // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().getDesc().isReturn()) { + if (!BB->empty() && BB->back().isReturn()) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 32c9325..b4fd1cb 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -332,7 +332,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; bool AtStart = I == MBB->begin(); @@ -426,11 +426,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Skip over all terminator instructions, which are part of the // return sequence. - if (! I->getDesc().isTerminator()) { + if (! I->isTerminator()) { ++I; } else { MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != MBB->begin() && (--I2)->isTerminator()) I = I2; } } @@ -698,7 +698,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // Add epilogue to restore the callee-save registers in each exiting block for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) + if (!I->empty() && I->back().isReturn()) TFI.emitEpilogue(Fn, *I); } @@ -706,7 +706,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (EnableSegmentedStacks) + if (Fn.getTarget().Options.EnableSegmentedStacks) TFI.adjustForSegmentedStacks(Fn); } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 4664a3c..c2656c5 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -748,8 +748,8 @@ void RAFast::AllocateBasicBlock() { // and return are tail calls; do not do this for them. The tail callee need // not take the same registers as input that it produces as output, and there // are dependencies for its input registers elsewhere. - if (!MBB->empty() && MBB->back().getDesc().isReturn() && - !MBB->back().getDesc().isCall()) { + if (!MBB->empty() && MBB->back().isReturn() && + !MBB->back().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator @@ -968,7 +968,7 @@ void RAFast::AllocateBasicBlock() { } unsigned DefOpEnd = MI->getNumOperands(); - if (MCID.isCall()) { + if (MI->isCall()) { // Spill all virtregs before a call. This serves two purposes: 1. If an // exception is thrown, the landing pad is going to expect to find // registers in their spill slots, and 2. we don't have to wade through diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 845ee12..a053ccc 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -34,13 +34,14 @@ #include "LiveRangeEdit.h" #include "RenderMachineFunction.h" #include "Spiller.h" -#include "Splitter.h" #include "VirtRegMap.h" #include "RegisterCoalescer.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/RegAllocPBQP.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -68,11 +69,6 @@ pbqpCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); -static cl::opt<bool> -pbqpPreSplitting("pbqp-pre-splitting", - cl::desc("Pre-split before PBQP register allocation."), - cl::init(false), cl::Hidden); - namespace { /// @@ -93,7 +89,6 @@ public: initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); } @@ -444,6 +439,9 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce( void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesCFG(); + au.addRequired<AliasAnalysis>(); + au.addPreserved<AliasAnalysis>(); au.addRequired<SlotIndexes>(); au.addPreserved<SlotIndexes>(); au.addRequired<LiveIntervals>(); @@ -454,10 +452,10 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); au.addRequired<MachineLoopInfo>(); au.addPreserved<MachineLoopInfo>(); - if (pbqpPreSplitting) - au.addRequired<LoopSplitter>(); au.addRequired<VirtRegMap>(); au.addRequired<RenderMachineFunction>(); MachineFunctionPass::getAnalysisUsage(au); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 22d6a3b..cd181cd 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -651,8 +651,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) return false; - const MCInstrDesc &MCID = DefMI->getDesc(); - if (!MCID.isCommutable()) + if (!DefMI->isCommutable()) return false; // If DefMI is a two-address instruction then commuting it will change the // destination register. @@ -718,7 +717,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, return false; if (NewMI != DefMI) { LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); - MBB->insert(DefMI, NewMI); + MachineBasicBlock::iterator Pos = DefMI; + MBB->insert(Pos, NewMI); MBB->erase(DefMI); } unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); @@ -809,14 +809,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, if (!DefMI) return false; assert(DefMI && "Defining instruction disappeared"); - const MCInstrDesc &MCID = DefMI->getDesc(); - if (!MCID.isAsCheapAsAMove()) + if (!DefMI->isAsCheapAsAMove()) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; bool SawStore = false; if (!DefMI->isSafeToMove(TII, AA, SawStore)) return false; + const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; if (!DefMI->isImplicitDef()) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 34b8ab0..4418f40 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -157,7 +157,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && - (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier()); + (ExitMI->isCall() || ExitMI->isBarrier()); if (ExitMI && AllDepKnown) { // If it's a call or a barrier, add dependencies on the defs and uses of // instruction. @@ -238,13 +238,12 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; } - const MCInstrDesc &MCID = MI->getDesc(); - assert(!MCID.isTerminator() && !MI->isLabel() && + assert(!MI->isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); - SU->isCall = MCID.isCall(); - SU->isCommutable = MCID.isCommutable(); + SU->isCall = MI->isCall(); + SU->isCommutable = MI->isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) @@ -278,8 +277,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || - !DefSU->getInstr()->registerDefIsDead(Reg))) - DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); + !DefSU->getInstr()->registerDefIsDead(Reg))) { + if (Kind == SDep::Anti) + DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/Reg)); + else { + unsigned AOLat = TII->getOutputLatency(InstrItins, MI, j, + DefSU->getInstr()); + DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/Reg)); + } + } } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector<SUnit *> &MemDefList = Defs[*Alias]; @@ -315,7 +321,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); if (RegUseIndex >= 0 && - (UseMCID.mayLoad() || UseMCID.mayStore()) && + (UseMI->mayLoad() || UseMI->mayStore()) && (unsigned)RegUseIndex < UseMCID.getNumOperands() && UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; @@ -419,9 +425,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (MCID.isCall() || MI->hasUnmodeledSideEffects() || + if (MI->isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && - (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) { + (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. for (std::map<const Value *, SUnit *>::iterator I = @@ -460,7 +466,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { PendingLoads.clear(); AliasMemDefs.clear(); AliasMemUses.clear(); - } else if (MCID.mayStore()) { + } else if (MI->mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { @@ -516,7 +522,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); - } else if (MCID.mayLoad()) { + } else if (MI->mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { @@ -576,7 +582,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { // Simplistic target-independent heuristic: assume that loads take // extra time. - if (SU->getInstr()->getDesc().mayLoad()) + if (SU->getInstr()->mayLoad()) SU->Latency += 2; } else { SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); @@ -658,39 +664,33 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { // EmitSchedule - Emit the machine code in scheduled order. MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { - // For MachineInstr-based scheduling, we're rescheduling the instructions in - // the block, so start by removing them from the block. - while (Begin != InsertPos) { - MachineBasicBlock::iterator I = Begin; - ++Begin; - BB->remove(I); - } + Begin = InsertPos; // If first instruction was a DBG_VALUE then put it back. if (FirstDbgValue) - BB->insert(InsertPos, FirstDbgValue); + BB->splice(InsertPos, BB, FirstDbgValue); // Then re-insert them according to the given schedule. for (unsigned i = 0, e = Sequence.size(); i != e; i++) { if (SUnit *SU = Sequence[i]) - BB->insert(InsertPos, SU->getInstr()); + BB->splice(InsertPos, BB, SU->getInstr()); else // Null SUnit* is a noop. EmitNoop(); - } - // Update the Begin iterator, as the first instruction in the block - // may have been scheduled later. - if (!Sequence.empty()) - Begin = Sequence[0]->getInstr(); + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (i == 0) + Begin = prior(InsertPos); + } // Reinsert any remaining debug_values. for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); MachineInstr *DbgValue = P.first; - MachineInstr *OrigPrivMI = P.second; - BB->insertAfter(OrigPrivMI, DbgValue); + MachineBasicBlock::iterator OrigPrivMI = P.second; + BB->splice(++OrigPrivMI, BB, DbgValue); } DbgValues.clear(); FirstDbgValue = NULL; diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index ff4184f..6023326 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -20,13 +20,3 @@ add_llvm_library(LLVMSelectionDAG TargetLowering.cpp TargetSelectionDAGInfo.cpp ) - -add_llvm_library_dependencies(LLVMSelectionDAG - LLVMAnalysis - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d8208a4..80cf0a8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -180,7 +180,9 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitCTLZ(SDNode *N); + SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); + SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); SDValue visitCTPOP(SDNode *N); SDValue visitSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); @@ -361,6 +363,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetOptions *Options, unsigned Depth = 0) { // No compile time optimizations on this type. if (Op.getValueType() == MVT::ppcf128) @@ -383,34 +386,39 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, return LegalOperations ? 0 : 1; case ISD::FADD: // FIXME: determine better conditions for this xform. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, + Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, + Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) return 1; case ISD::FMUL: case ISD::FDIV: - if (HonorSignDependentRoundingFPMath()) return 0; + if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, + Depth + 1)) return V; - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, + Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); + return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, + Depth + 1); } } @@ -434,10 +442,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -449,7 +458,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0)); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(UnsafeFPMath); + assert(DAG.getTarget().Options.UnsafeFPMath); // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) @@ -462,10 +471,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!HonorSignDependentRoundingFPMath()); + assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + &DAG.getTarget().Options, Depth+1)) return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -1070,7 +1080,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRA: return visitSRA(N); case ISD::SRL: return visitSRL(N); case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); case ISD::CTPOP: return visitCTPOP(N); case ISD::SELECT: return visitSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); @@ -1769,7 +1781,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && + if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following @@ -3709,6 +3721,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -3719,6 +3741,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz_zero_undef c1) -> c2 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return SDValue(); +} + SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -5254,20 +5286,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); // fold (fadd A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N1CFP->getValueAPF().isZero()) return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) - if (isNegatibleForFree(N1, LegalOperations) == 2) + if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) - if (isNegatibleForFree(N0, LegalOperations) == 2) + if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && - N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(1))) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); @@ -5292,17 +5326,19 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); // fold (fsub A, 0) -> A - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N0; // fold (fsub 0, B) -> -B - if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations)) + if (DAG.getTarget().Options.UnsafeFPMath && + N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations)) + if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); @@ -5329,10 +5365,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); // fold (fmul A, 0) -> 0 - if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N1CFP->getValueAPF().isZero()) return N1; // fold (fmul A, 0) -> 0, vector edition. - if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + if (DAG.getTarget().Options.UnsafeFPMath && + ISD::isBuildVectorAllZeros(N1.getNode())) return N1; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) @@ -5343,8 +5381,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5355,7 +5395,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && + if (DAG.getTarget().Options.UnsafeFPMath && + N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, @@ -5383,8 +5424,10 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, + &DAG.getTarget().Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, + &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -5637,7 +5680,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (isNegatibleForFree(N0, LegalOperations)) + if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading @@ -7162,19 +7205,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indicies are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - // - SDValue InsIdx = N->getOperand(1); - SDValue ExtIdx = V->getOperand(2); - - if (InsIdx == ExtIdx) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); + // Only handle cases where both indexes are constants with the same type. + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); + + if (InsIdx && ExtIdx && + InsIdx->getValueType(0).getSizeInBits() <= 64 && + ExtIdx->getValueType(0).getSizeInBits() <= 64) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) + return V->getOperand(1); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, + V->getOperand(0), N->getOperand(1)); + } } return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index cff37c2..b4946ec 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -62,8 +62,11 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by target-independent selector"); -STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by target-specific selector"); +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " + "target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " + "target-specific selector"); +STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. @@ -307,6 +310,18 @@ void FastISel::recomputeInsertPt() { ++FuncInfo.InsertPt; } +void FastISel::removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + while (I != E) { + MachineInstr *Dead = &*I; + ++I; + Dead->eraseFromParent(); + ++NumFastIselDead; + } + recomputeInsertPt(); +} + FastISel::SavePoint FastISel::enterLocalValueArea() { MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; DebugLoc OldDL = DL; @@ -792,19 +807,33 @@ FastISel::SelectInstruction(const Instruction *I) { DL = I->getDebugLoc(); + MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } + // Remove dead code. However, ignore call instructions since we've flushed + // the local value map and recomputed the insert point. + if (!isa<CallInst>(I)) { + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + } // Next, try calling the target to attempt to handle the instruction. + SavedInsertPt = FuncInfo.InsertPt; if (TargetSelectInstruction(I)) { ++NumFastIselSuccessTarget; DL = DebugLoc(); return true; } + // Check for dead code and remove as necessary. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); DL = DebugLoc(); return false; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 2ff66f8..cb6fd53 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, const TargetRegisterClass *DstRC = 0; if (IIOpNum < II->getNumOperands()) DstRC = TII->getRegClass(*II, IIOpNum, TRI); - assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) && + assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) && "Don't have operand info for this instruction!"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt index 10a849f..81d2e00 100644 --- a/lib/CodeGen/SelectionDAG/LLVMBuild.txt +++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = SelectionDAG parent = CodeGen required_libraries = Analysis CodeGen Core MC Support Target TransformUtils - diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 156cc70..75f5761 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -785,7 +785,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -2383,6 +2382,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return Op; } + case ISD::CTLZ_ZERO_UNDEF: + // This trivially expands to CTLZ. + return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { // for now, we do this: // x = x | (x >> 1); @@ -2404,6 +2406,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, Op = DAG.getNOT(dl, Op, VT); return DAG.getNode(ISD::CTPOP, dl, VT, Op); } + case ISD::CTTZ_ZERO_UNDEF: + // This trivially expands to CTTZ. + return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: @@ -2518,7 +2523,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); Results.push_back(Tmp1); break; @@ -2538,7 +2545,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: - case ISD::EH_SJLJ_DISPATCHSETUP: // If the target didn't expand these, there's nothing to do, so just // preserve the chain and be done. Results.push_back(Node->getOperand(0)); @@ -3421,20 +3427,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - // Perform the larger operation. + // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is + // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { - //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) + // FIXME: This should set a bit in the zero extended value instead. Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); - } else if (Node->getOpcode() == ISD::CTLZ) { + } else if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index fd24238..1c02c4f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -56,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::Constant: Res = PromoteIntRes_Constant(N); break; case ISD::CONVERT_RNDSAT: Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; @@ -216,7 +218,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { case TargetLowering::TypeLegal: break; case TargetLowering::TypePromoteInteger: - if (NOutVT.bitsEq(NInVT)) + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector()) // The input promotes to the same size. Convert the promoted value. return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); break; @@ -311,7 +313,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { DebugLoc dl = N->getDebugLoc(); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); + Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode(ISD::SUB, dl, NVT, Op, DAG.getConstant(NVT.getSizeInBits() - @@ -329,13 +331,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); - // The count is the same in the promoted type except if the original - // value was zero. This can be handled by setting the bit just off - // the top of the original type. - APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.setBit(OVT.getSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); - return DAG.getNode(ISD::CTTZ, dl, NVT, Op); + if (N->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + APInt TopBit(NVT.getSizeInBits(), 0); + TopBit.setBit(OVT.getSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Op); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -1097,8 +1101,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; @@ -1701,8 +1707,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, DAG.getNode(ISD::ADD, dl, NVT, LoLZ, @@ -1731,8 +1737,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); - SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo); - SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi); + SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, DAG.getNode(ISD::ADD, dl, NVT, HiLZ, diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 4e02b90..4696c0d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -185,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: - case ISD::CTTZ: case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::SELECT: case ISD::VSELECT: diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ad83565..7ca0d1e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -441,8 +441,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: - case ISD::CTPOP: case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index cd0da37..80162d7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -102,17 +102,6 @@ static cl::opt<unsigned> AvgIPC( "sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists.")); -#ifndef NDEBUG -namespace { - // For sched=list-ilp, Count the number of times each factor comes into play. - enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, - FactStatic, FactOther, NumFactors }; -} -static const char *FactorName[NumFactors] = -{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; -static int FactorCount[NumFactors]; -#endif //!NDEBUG - namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -157,6 +146,10 @@ private: /// and similar queries. ScheduleDAGTopologicalSort Topo; + // Hack to keep track of the inverse of FindCallSeqStart without more crazy + // DAG crawling. + DenseMap<SUnit*, SUnit*> CallSeqEndForStart; + public: ScheduleDAGRRList(MachineFunction &mf, bool needlatency, SchedulingPriorityQueue *availqueue, @@ -308,11 +301,6 @@ void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() << " '" << BB->getName() << "' **********\n"); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - FactorCount[i] = 0; - } -#endif //!NDEBUG CurCycle = 0; IssueCount = 0; @@ -322,6 +310,7 @@ void ScheduleDAGRRList::Schedule() { // to track the virtual resource of a calling sequence. LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + CallSeqEndForStart.clear(); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -337,11 +326,6 @@ void ScheduleDAGRRList::Schedule() { // Execute the actual scheduling loop. ListScheduleBottomUp(); -#ifndef NDEBUG - for (int i = 0; i < NumFactors; ++i) { - DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); - } -#endif // !NDEBUG AvailableQueue->releaseState(); } @@ -545,6 +529,8 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); SUnit *Def = &SUnits[N->getNodeId()]; + CallSeqEndForStart[Def] = SU; + ++NumLiveRegs; LiveRegDefs[CallResource] = Def; LiveRegGens[CallResource] = SU; @@ -811,7 +797,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { ++NumLiveRegs; LiveRegDefs[CallResource] = SU; - LiveRegGens[CallResource] = NULL; + LiveRegGens[CallResource] = CallSeqEndForStart[SU]; } } @@ -832,12 +818,11 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { + if (!LiveRegDefs[I->getReg()]) + ++NumLiveRegs; // This becomes the nearest def. Note that an earlier def may still be // pending if this is a two-address node. LiveRegDefs[I->getReg()] = SU; - if (!LiveRegDefs[I->getReg()]) { - ++NumLiveRegs; - } if (LiveRegGens[I->getReg()] == NULL || I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) LiveRegGens[I->getReg()] = I->getSUnit(); @@ -2296,28 +2281,20 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) { - DEBUG(++FactorCount[FactStall]); + if (!RStall) return 1; - } - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactStall]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } - } else if (RStall) { - DEBUG(++FactorCount[FactStall]); + } else if (RStall) return -1; - } // If either node is scheduling for latency, sort them by height/depth // and latency. if (!checkPref || (left->SchedulingPref == Sched::ILP || right->SchedulingPref == Sched::ILP)) { if (DisableSchedCycles) { - if (LHeight != RHeight) { - DEBUG(++FactorCount[FactHeight]); + if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; - } } else { // If neither instruction stalls (!LStall && !RStall) then @@ -2326,17 +2303,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LDepth = left->getDepth() - LPenalty; int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { - DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) { - DEBUG(++FactorCount[FactOther]); + if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; - } } return 0; } @@ -2350,7 +2324,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool LHasPhysReg = left->hasPhysRegDefs; bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { - DEBUG(++FactorCount[FactRegUses]); #ifndef NDEBUG const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; #endif @@ -2376,10 +2349,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; } - if (LPriority != RPriority) { - DEBUG(++FactorCount[FactStatic]); + if (LPriority != RPriority) return LPriority > RPriority; - } // One or both of the nodes are calls and their sethi-ullman numbers are the // same, then keep source order. @@ -2412,18 +2383,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) { - DEBUG(++FactorCount[FactOther]); + if (LDist != RDist) return LDist < RDist; - } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) { - DEBUG(++FactorCount[FactOther]); + if (LScratch != RScratch) return LScratch > RScratch; - } // Comparing latency against a call makes little sense unless the node // is register pressure-neutral. @@ -2438,20 +2405,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { return result > 0; } else { - if (left->getHeight() != right->getHeight()) { - DEBUG(++FactorCount[FactHeight]); + if (left->getHeight() != right->getHeight()) return left->getHeight() > right->getHeight(); - } - if (left->getDepth() != right->getDepth()) { - DEBUG(++FactorCount[FactDepth]); + if (left->getDepth() != right->getDepth()) return left->getDepth() < right->getDepth(); - } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); - DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } @@ -2511,13 +2473,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; @@ -2581,7 +2541,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { RPDiff = SPQ->RegPressureDiff(right, RLiveUses); } if (!DisableSchedRegPressure && LPDiff != RPDiff) { - DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); return LPDiff > RPDiff; @@ -2590,7 +2549,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { bool LReduce = canEnableCoalescing(left); bool RReduce = canEnableCoalescing(right); - DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); if (LReduce && !RReduce) return false; if (RReduce && !LReduce) return true; } @@ -2598,17 +2556,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); - DEBUG(++FactorCount[FactRegUses]); return LLiveUses < RLiveUses; } if (!DisableSchedStalls) { bool LStall = BUHasStall(left, left->getHeight(), SPQ); bool RStall = BUHasStall(right, right->getHeight(), SPQ); - if (LStall != RStall) { - DEBUG(++FactorCount[FactHeight]); + if (LStall != RStall) return left->getHeight() > right->getHeight(); - } } if (!DisableSchedCriticalPath) { @@ -2617,17 +2572,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " << left->getDepth() << " != SU(" << right->NodeNum << "): " << right->getDepth() << "\n"); - DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); } } if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { int spread = (int)left->getHeight() - (int)right->getHeight(); - if (std::abs(spread) > MaxReorderWindow) { - DEBUG(++FactorCount[FactHeight]); + if (std::abs(spread) > MaxReorderWindow) return left->getHeight() > right->getHeight(); - } } return BURRSort(left, right, SPQ); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 497c286..dd626e2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -840,9 +840,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm) +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), + OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); @@ -1856,7 +1856,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: { unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); @@ -2334,7 +2336,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. - if (NoNaNsFPMath) + if (getTarget().Options.NoNaNsFPMath) return true; // If the value is a constant, we can obviously see if it is a NaN or not. @@ -2429,8 +2431,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::CTPOP: return getConstant(Val.countPopulation(), VT); case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: return getConstant(Val.countLeadingZeros(), VT); case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), VT); } } @@ -2607,7 +2611,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (UnsafeFPMath && OpOpcode == ISD::FSUB) + if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), Operand.getNode()->getOperand(0)); if (OpOpcode == ISD::FNEG) // --X -> X @@ -2742,7 +2746,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) { + if (getTarget().Options.UnsafeFPMath) { if (Opcode == ISD::FADD) { // 0+x --> x if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) @@ -3065,7 +3069,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (UnsafeFPMath) + if (getTarget().Options.UnsafeFPMath) return N2; break; case ISD::MUL: @@ -4914,6 +4918,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return N; } +/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// the line number information on the merged node since it is not possible to +/// preserve the information that operation is associated with multiple lines. +/// This will make the debugger working better at -O0, were there is a higher +/// probability having other instructions associated with that line. +/// +SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { + DebugLoc NLoc = N->getDebugLoc(); + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + N->setDebugLoc(DebugLoc()); + } + return N; +} + /// MorphNodeTo - This *mutates* the specified node to have the specified /// return type, opcode, and operands. /// @@ -4935,7 +4953,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) - return ON; + return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); } if (!RemoveNodeFromCSEMaps(N)) @@ -5139,8 +5157,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return cast<MachineSDNode>(E); + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL)); + } } // Allocate a new MachineSDNode. @@ -5943,7 +5962,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; - case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP"; case ISD::ConstantPool: return "ConstantPool"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; @@ -6112,10 +6130,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::TRAP: return "trap"; // Bit manipulation - case ISD::BSWAP: return "bswap"; - case ISD::CTPOP: return "ctpop"; - case ISD::CTTZ: return "cttz"; - case ISD::CTLZ: return "ctlz"; + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; // Trampolines case ISD::INIT_TRAMPOLINE: return "init_trampoline"; @@ -6146,6 +6166,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETLT: return "setlt"; case ISD::SETLE: return "setle"; case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; } } } @@ -6554,20 +6579,15 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - // If GV has specified alignment, then use it. Otherwise, use the preferred - // alignment. - unsigned Align = GV->getAlignment(); - if (!Align) { - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { - if (GVar->hasInitializer()) { - const TargetData *TD = TLI.getTargetData(); - Align = TD->getPreferredAlignment(GVar); - } - } - if (!Align) - Align = TLI.getTargetData()->getABITypeAlignment(GV->getType()); - } - return MinAlign(Align, GVOffset); + unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + APInt AllOnes = APInt::getAllOnesValue(PtrWidth); + APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); + llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), AllOnes, + KnownZero, KnownOne, TLI.getTargetData()); + unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; + if (Align) + return MinAlign(Align, GVOffset); } // If this is a direct reference to a stack slot, use information about the diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8d02350..68c9514 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -47,6 +47,7 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -812,9 +813,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li) { AA = &aa; GFI = gfi; + LibInfo = li; TD = DAG.getTarget().getTargetData(); LPadToCallSiteMap.clear(); } @@ -1335,6 +1338,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, Condition = getICmpCondCode(IC->getPredicate()); } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { Condition = getFCmpCondCode(FC->getPredicate()); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); } else { Condition = ISD::SETEQ; // silence warning. llvm_unreachable("Unknown compare instruction"); @@ -2002,7 +2007,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return !DisableJumpTables && + return !TLI.getTargetMachine().Options.DisableJumpTables && (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } @@ -2625,6 +2630,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } @@ -3095,7 +3102,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned Amt = ElementSize.logBase2(); IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), N.getValueType(), IdxN, - DAG.getConstant(Amt, TLI.getPointerTy())); + DAG.getConstant(Amt, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), @@ -4775,11 +4782,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getRoot(), getValue(I.getArgOperand(0)))); return 0; } - case Intrinsic::eh_sjlj_dispatch_setup: { - DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot(), getValue(I.getArgOperand(0)))); - return 0; - } case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: @@ -4946,14 +4948,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, + dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { @@ -5064,7 +5070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::trap: { - StringRef TrapFuncName = getTrapFunctionName(); + StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); return 0; @@ -5226,7 +5232,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // If there's a possibility that fast-isel has already selected some amount // of the current basic block, don't emit a tail call. - if (isTailCall && EnableFastISel) + if (isTailCall && TM.Options.EnableFastISel) isTailCall = false; std::pair<SDValue,SDValue> Result = @@ -5510,7 +5516,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // can't be a library call. if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { + if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || + (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || + (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5521,7 +5529,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LHS.getValueType(), LHS, RHS)); return; } - } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { + } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || + (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || + (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType()) { @@ -5530,7 +5540,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { + } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || + (LibInfo->has(LibFunc::sinf) && Name == "sinf") || + (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5540,7 +5552,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { + } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || + (LibInfo->has(LibFunc::cosf) && Name == "cosf") || + (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5550,7 +5564,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || + (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || + (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { if (I.getNumArgOperands() == 1 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && @@ -5560,6 +5576,83 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Tmp.getValueType(), Tmp)); return; } + } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || + (LibInfo->has(LibFunc::floorf) && Name == "floorf") || + (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || + (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || + (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || + (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || + (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || + (LibInfo->has(LibFunc::rintf) && Name == "rintf") || + (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || + (LibInfo->has(LibFunc::truncf) && Name == "truncf") || + (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || + (LibInfo->has(LibFunc::log2f) && Name == "log2f") || + (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } + } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || + (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || + (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } } else if (Name == "memcmp") { if (visitMemCmpCall(I)) return; @@ -6516,10 +6609,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { /// isOnlyUsedInEntryBlock - If the specified argument is only used in the /// entry block, return true. This includes arguments used by switches, since /// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(const Argument *A) { +static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { // With FastISel active, we may be splitting blocks, so force creation // of virtual registers for all non-dead arguments. - if (EnableFastISel) + if (FastISel) return A->use_empty(); const BasicBlock *Entry = A->getParent()->begin(); @@ -6709,7 +6802,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SDB->getCurDebugLoc()); SDB->setValue(I, Res); - if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = @@ -6719,7 +6812,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. - if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more // general. It's also subtly incompatible with the hacks FastISel @@ -6730,7 +6823,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { continue; } } - if (!isOnlyUsedInEntryBlock(I)) { + if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { FuncInfo->InitializeRegForValue(I); SDB->CopyToExportRegsIfNeeded(I); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0a21ca3..5147b6c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -67,6 +67,7 @@ class SIToFPInst; class StoreInst; class SwitchInst; class TargetData; +class TargetLibraryInfo; class TargetLowering; class TruncInst; class UIToFPInst; @@ -294,6 +295,7 @@ public: SelectionDAG &DAG; const TargetData *TD; AliasAnalysis *AA; + const TargetLibraryInfo *LibInfo; /// SwitchCases - Vector of CaseBlock structures used to communicate /// SwitchInst code generation information. @@ -338,7 +340,8 @@ public: HasTailCall(false), Context(dag.getContext()) { } - void init(GCFunctionInfo *gfi, AliasAnalysis &aa); + void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li); /// clear - Clear out the current SelectionDAG and the associated /// state and prepare this SelectionDAGBuilder object to be used diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8cecc17..3c95059 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -41,6 +41,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -61,6 +62,81 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +#ifndef NDEBUG +static cl::opt<bool> +EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, + cl::desc("Enable extra verbose messages in the \"fast\" " + "instruction selector")); + // Terminators +STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); +STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); +STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); +STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); +STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); +STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); +STATISTIC(NumFastIselFailUnwind,"Fast isel fails on Unwind"); +STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); + + // Standard binary operators... +STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add"); +STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd"); +STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub"); +STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub"); +STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul"); +STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul"); +STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv"); +STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv"); +STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv"); +STATISTIC(NumFastIselFailURem,"Fast isel fails on URem"); +STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem"); +STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem"); + + // Logical operators... +STATISTIC(NumFastIselFailAnd,"Fast isel fails on And"); +STATISTIC(NumFastIselFailOr,"Fast isel fails on Or"); +STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor"); + + // Memory instructions... +STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca"); +STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load"); +STATISTIC(NumFastIselFailStore,"Fast isel fails on Store"); +STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg"); +STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM"); +STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence"); +STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr"); + + // Convert instructions... +STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc"); +STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt"); +STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt"); +STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc"); +STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt"); +STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI"); +STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI"); +STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP"); +STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP"); +STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr"); +STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt"); +STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast"); + + // Other instructions... +STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp"); +STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp"); +STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI"); +STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select"); +STATISTIC(NumFastIselFailCall,"Fast isel fails on Call"); +STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl"); +STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr"); +STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr"); +STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg"); +STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement"); +STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement"); +STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); +STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); +STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); +STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); +#endif + static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -177,7 +253,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - assert(!MI->getDesc().hasPostISelHook() && + assert(!MI->hasPostISelHook() && "If a target marks an instruction with 'hasPostISelHook', " "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); } @@ -190,7 +266,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm)), + CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -198,6 +274,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); } SelectionDAGISel::~SelectionDAGISel() { @@ -211,6 +288,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<GCModuleInfo>(); AU.addPreserved<GCModuleInfo>(); + AU.addRequired<TargetLibraryInfo>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -256,9 +334,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. - assert((!EnableFastISelVerbose || EnableFastISel) && + assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); - assert((!EnableFastISelAbort || EnableFastISel) && + assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort requires -fast-isel"); const Function &Fn = *mf.getFunction(); @@ -268,6 +346,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF = &mf; RegInfo = &MF->getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); + LibInfo = &getAnalysis<TargetLibraryInfo>(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -282,7 +361,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else FuncInfo->BPI = 0; - SDB->init(GFI, *AA); + SDB->init(GFI, *AA, LibInfo); SelectAllBasicBlocks(Fn); @@ -346,7 +425,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII.get(TargetOpcode::DBG_VALUE)) .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) .addImm(Offset).addMetadata(Variable); - EntryMBB->insertAfter(CopyUseMI, NewMI); + MachineBasicBlock::iterator Pos = CopyUseMI; + EntryMBB->insertAfter(Pos, NewMI); } } } @@ -820,10 +900,88 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, !FuncInfo->isExportedInst(I); // Exported instrs must be computed. } +#ifndef NDEBUG +static void collectFailStats(const Instruction *I) { + switch (I->getOpcode()) { + default: assert (0 && "<Invalid operator> "); + + // Terminators + case Instruction::Ret: NumFastIselFailRet++; return; + case Instruction::Br: NumFastIselFailBr++; return; + case Instruction::Switch: NumFastIselFailSwitch++; return; + case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; + case Instruction::Invoke: NumFastIselFailInvoke++; return; + case Instruction::Resume: NumFastIselFailResume++; return; + case Instruction::Unwind: NumFastIselFailUnwind++; return; + case Instruction::Unreachable: NumFastIselFailUnreachable++; return; + + // Standard binary operators... + case Instruction::Add: NumFastIselFailAdd++; return; + case Instruction::FAdd: NumFastIselFailFAdd++; return; + case Instruction::Sub: NumFastIselFailSub++; return; + case Instruction::FSub: NumFastIselFailFSub++; return; + case Instruction::Mul: NumFastIselFailMul++; return; + case Instruction::FMul: NumFastIselFailFMul++; return; + case Instruction::UDiv: NumFastIselFailUDiv++; return; + case Instruction::SDiv: NumFastIselFailSDiv++; return; + case Instruction::FDiv: NumFastIselFailFDiv++; return; + case Instruction::URem: NumFastIselFailURem++; return; + case Instruction::SRem: NumFastIselFailSRem++; return; + case Instruction::FRem: NumFastIselFailFRem++; return; + + // Logical operators... + case Instruction::And: NumFastIselFailAnd++; return; + case Instruction::Or: NumFastIselFailOr++; return; + case Instruction::Xor: NumFastIselFailXor++; return; + + // Memory instructions... + case Instruction::Alloca: NumFastIselFailAlloca++; return; + case Instruction::Load: NumFastIselFailLoad++; return; + case Instruction::Store: NumFastIselFailStore++; return; + case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return; + case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return; + case Instruction::Fence: NumFastIselFailFence++; return; + case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return; + + // Convert instructions... + case Instruction::Trunc: NumFastIselFailTrunc++; return; + case Instruction::ZExt: NumFastIselFailZExt++; return; + case Instruction::SExt: NumFastIselFailSExt++; return; + case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return; + case Instruction::FPExt: NumFastIselFailFPExt++; return; + case Instruction::FPToUI: NumFastIselFailFPToUI++; return; + case Instruction::FPToSI: NumFastIselFailFPToSI++; return; + case Instruction::UIToFP: NumFastIselFailUIToFP++; return; + case Instruction::SIToFP: NumFastIselFailSIToFP++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; + + // Other instructions... + case Instruction::ICmp: NumFastIselFailICmp++; return; + case Instruction::FCmp: NumFastIselFailFCmp++; return; + case Instruction::PHI: NumFastIselFailPHI++; return; + case Instruction::Select: NumFastIselFailSelect++; return; + case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Shl: NumFastIselFailShl++; return; + case Instruction::LShr: NumFastIselFailLShr++; return; + case Instruction::AShr: NumFastIselFailAShr++; return; + case Instruction::VAArg: NumFastIselFailVAArg++; return; + case Instruction::ExtractElement: NumFastIselFailExtractElement++; return; + case Instruction::InsertElement: NumFastIselFailInsertElement++; return; + case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return; + case Instruction::ExtractValue: NumFastIselFailExtractValue++; return; + case Instruction::InsertValue: NumFastIselFailInsertValue++; return; + case Instruction::LandingPad: NumFastIselFailLandingPad++; return; + } + return; +} +#endif + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; - if (EnableFastISel) + if (TM.Options.EnableFastISel) FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. @@ -931,6 +1089,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } +#ifndef NDEBUG + if (EnableFastISelVerbose2) + collectFailStats(Inst); +#endif + // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(Inst)) { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d7bad43..9ced1ac 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -577,16 +577,26 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setOperationAction(ISD::ConstantFP, MVT::f80, Expand); // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10,MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10,MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -1473,9 +1483,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = InOp.getNode()->getOperand(0); EVT InnerVT = InnerOp.getValueType(); - if ((APInt::getHighBitsSet(BitWidth, - BitWidth - InnerVT.getSizeInBits()) & - DemandedMask) == 0 && + unsigned InnerBits = InnerVT.getSizeInBits(); + if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) @@ -1545,7 +1554,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) + if (NewMask == 1) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -1783,7 +1792,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!Op.getValueType().isVector() && + if (!TLO.LegalOperations() && + !Op.getValueType().isVector() && !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 13f269e..70fcf55 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -124,7 +124,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { } bool PEI::isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); + return (MBB && !MBB->empty() && MBB->back().isReturn()); } // Initialize shrink wrapping DFA sets, called before iterations. diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index c865192..8e2f74f 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -69,6 +69,8 @@ namespace { private: bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal); Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads); void lowerIncomingArguments(Function &F); void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes); @@ -138,6 +140,38 @@ static void MarkBlocksLiveIn(BasicBlock *BB, MarkBlocksLiveIn(*PI, LiveBBs); } +/// substituteLPadValues - Substitute the values returned by the landingpad +/// instruction with those returned by the personality function. +void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal) { + SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); + while (!UseWorkList.empty()) { + Value *Val = UseWorkList.pop_back_val(); + ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); + if (!EVI) continue; + if (EVI->getNumIndices() != 1) continue; + if (*EVI->idx_begin() == 0) + EVI->replaceAllUsesWith(ExnVal); + else if (*EVI->idx_begin() == 1) + EVI->replaceAllUsesWith(SelVal); + if (EVI->getNumUses() == 0) + EVI->eraseFromParent(); + } + + if (LPI->getNumUses() == 0) return; + + // There are still some uses of LPI. Construct an aggregate with the exception + // values and replace the LPI with that aggregate. + Type *LPadType = LPI->getType(); + Value *LPadVal = UndefValue::get(LPadType); + IRBuilder<> + Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal)))); + LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); + LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); + + LPI->replaceAllUsesWith(LPadVal); +} + /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. Value *SjLjEHPass:: @@ -189,12 +223,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext())); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); - Type *LPadType = LPI->getType(); - Value *LPadVal = UndefValue::get(LPadType); - LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); - LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); - - LPI->replaceAllUsesWith(LPadVal); + substituteLPadValues(LPI, ExnVal, SelVal); } // Personality function diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 751d604..c086073 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -80,7 +80,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); I != E;) { --I; - if (I->getDesc().isCall()) { + if (I->isCall()) { LSP.second = LIS.getInstructionIndex(I); break; } diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp deleted file mode 100644 index 16cf9b8..0000000 --- a/lib/CodeGen/Splitter.cpp +++ /dev/null @@ -1,827 +0,0 @@ -//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "loopsplitter" - -#include "Splitter.h" - -#include "llvm/Module.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; - -char LoopSplitter::ID = 0; -INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting", - "Split virtual regists across loop boundaries.", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(LoopSplitter, "loop-splitting", - "Split virtual regists across loop boundaries.", false, false) - -namespace llvm { - - class StartSlotComparator { - public: - StartSlotComparator(LiveIntervals &lis) : lis(lis) {} - bool operator()(const MachineBasicBlock *mbb1, - const MachineBasicBlock *mbb2) const { - return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2); - } - private: - LiveIntervals &lis; - }; - - class LoopSplit { - public: - LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop) - : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) { - assert(TargetRegisterInfo::isVirtualRegister(li.reg) && - "Cannot split physical registers."); - } - - LiveInterval& getLI() const { return li; } - - MachineLoop& getLoop() const { return loop; } - - bool isValid() const { return valid; } - - bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); } - - void invalidate() { valid = false; } - - void splitIncoming() { inSplit = true; } - - void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); } - - void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); } - - void apply() { - assert(valid && "Attempt to apply invalid split."); - applyIncoming(); - applyOutgoing(); - copyRanges(); - renameInside(); - } - - private: - LoopSplitter &ls; - LiveInterval &li; - MachineLoop &loop; - bool valid, inSplit; - std::set<MachineLoop::Edge> outSplits; - std::vector<MachineInstr*> loopInstrs; - - LiveInterval *newLI; - std::map<VNInfo*, VNInfo*> vniMap; - - LiveInterval* getNewLI() { - if (newLI == 0) { - const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg); - unsigned vreg = ls.mri->createVirtualRegister(trc); - newLI = &ls.lis->getOrCreateInterval(vreg); - } - return newLI; - } - - VNInfo* getNewVNI(VNInfo *oldVNI) { - VNInfo *newVNI = vniMap[oldVNI]; - - if (newVNI == 0) { - newVNI = getNewLI()->createValueCopy(oldVNI, - ls.lis->getVNInfoAllocator()); - vniMap[oldVNI] = newVNI; - } - - return newVNI; - } - - void applyIncoming() { - if (!inSplit) { - return; - } - - MachineBasicBlock *preHeader = loop.getLoopPreheader(); - if (preHeader == 0) { - assert(ls.canInsertPreHeader(loop) && - "Can't insert required preheader."); - preHeader = &ls.insertPreHeader(loop); - } - - LiveRange *preHeaderRange = - ls.lis->findExitingRange(li, preHeader); - assert(preHeaderRange != 0 && "Range not live into preheader."); - - // Insert the new copy. - MachineInstr *copy = BuildMI(*preHeader, - preHeader->getFirstTerminator(), - DebugLoc(), - ls.tii->get(TargetOpcode::COPY)) - .addReg(getNewLI()->reg, RegState::Define) - .addReg(li.reg, RegState::Kill); - - ls.lis->InsertMachineInstrInMaps(copy); - - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot(); - - VNInfo *newVal = getNewVNI(preHeaderRange->valno); - newVal->def = copyDefIdx; - newVal->setCopy(copy); - li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true); - - getNewLI()->addRange(LiveRange(copyDefIdx, - ls.lis->getMBBEndIdx(preHeader), - newVal)); - } - - void applyOutgoing() { - - for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(), - osEnd = outSplits.end(); - osItr != osEnd; ++osItr) { - MachineLoop::Edge edge = *osItr; - MachineBasicBlock *outBlock = edge.second; - if (ls.isCriticalEdge(edge)) { - assert(ls.canSplitEdge(edge) && "Unsplitable critical edge."); - outBlock = &ls.splitEdge(edge, loop); - } - LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock); - assert(outRange != 0 && "No exiting range?"); - - MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(), - DebugLoc(), - ls.tii->get(TargetOpcode::COPY)) - .addReg(li.reg, RegState::Define) - .addReg(getNewLI()->reg, RegState::Kill); - - ls.lis->InsertMachineInstrInMaps(copy); - - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot(); - - // Blow away output range definition. - outRange->valno->def = ls.lis->getInvalidIndex(); - li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx); - - SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock); - assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = - getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator()); - - getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock), - copyDefIdx, newVal)); - - } - } - - void copyRange(LiveRange &lr) { - std::pair<bool, LoopSplitter::SlotPair> lsr = - ls.getLoopSubRange(lr, loop); - - if (!lsr.first) - return; - - LiveRange loopRange(lsr.second.first, lsr.second.second, - getNewVNI(lr.valno)); - - li.removeRange(loopRange.start, loopRange.end, true); - - getNewLI()->addRange(loopRange); - } - - void copyRanges() { - for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), - iEnd = loopInstrs.end(); - iItr != iEnd; ++iItr) { - MachineInstr &instr = **iItr; - SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); - if (instr.modifiesRegister(li.reg, 0)) { - LiveRange *defRange = - li.getLiveRangeContaining(instrIdx.getRegSlot()); - if (defRange != 0) // May have caught this already. - copyRange(*defRange); - } - if (instr.readsRegister(li.reg, 0)) { - LiveRange *useRange = - li.getLiveRangeContaining(instrIdx.getRegSlot(true)); - if (useRange != 0) { // May have caught this already. - copyRange(*useRange); - } - } - } - - for (MachineLoop::block_iterator bbItr = loop.block_begin(), - bbEnd = loop.block_end(); - bbItr != bbEnd; ++bbItr) { - MachineBasicBlock &loopBlock = **bbItr; - LiveRange *enteringRange = - ls.lis->findEnteringRange(li, &loopBlock); - if (enteringRange != 0) { - copyRange(*enteringRange); - } - } - } - - void renameInside() { - for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(), - iEnd = loopInstrs.end(); - iItr != iEnd; ++iItr) { - MachineInstr &instr = **iItr; - for (unsigned i = 0; i < instr.getNumOperands(); ++i) { - MachineOperand &mop = instr.getOperand(i); - if (mop.isReg() && mop.getReg() == li.reg) { - mop.setReg(getNewLI()->reg); - } - } - } - } - - }; - - void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<MachineDominatorTree>(); - au.addPreserved<MachineDominatorTree>(); - au.addRequired<MachineLoopInfo>(); - au.addPreserved<MachineLoopInfo>(); - au.addPreservedID(RegisterCoalescerPassID); - au.addPreserved<CalculateSpillWeights>(); - au.addPreserved<LiveStacks>(); - au.addRequired<SlotIndexes>(); - au.addPreserved<SlotIndexes>(); - au.addRequired<LiveIntervals>(); - au.addPreserved<LiveIntervals>(); - MachineFunctionPass::getAnalysisUsage(au); - } - - bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) { - - mf = &fn; - mri = &mf->getRegInfo(); - tii = mf->getTarget().getInstrInfo(); - tri = mf->getTarget().getRegisterInfo(); - sis = &getAnalysis<SlotIndexes>(); - lis = &getAnalysis<LiveIntervals>(); - mli = &getAnalysis<MachineLoopInfo>(); - mdt = &getAnalysis<MachineDominatorTree>(); - - fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." + - mf->getFunction()->getName().str(); - - dbgs() << "Splitting " << mf->getFunction()->getName() << "."; - - dumpOddTerminators(); - -// dbgs() << "----------------------------------------\n"; -// lis->dump(); -// dbgs() << "----------------------------------------\n"; - -// std::deque<MachineLoop*> loops; -// std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); -// dbgs() << "Loops:\n"; -// while (!loops.empty()) { -// MachineLoop &loop = *loops.front(); -// loops.pop_front(); -// std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); - -// dumpLoopInfo(loop); -// } - - //lis->dump(); - //exit(0); - - // Setup initial intervals. - for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval *li = liItr->second; - - if (TargetRegisterInfo::isVirtualRegister(li->reg) && - !lis->intervalIsInOneMBB(*li)) { - intervals.push_back(li); - } - } - - processIntervals(); - - intervals.clear(); - -// dbgs() << "----------------------------------------\n"; -// lis->dump(); -// dbgs() << "----------------------------------------\n"; - - dumpOddTerminators(); - - //exit(1); - - return false; - } - - void LoopSplitter::releaseMemory() { - fqn.clear(); - intervals.clear(); - loopRangeMap.clear(); - } - - void LoopSplitter::dumpOddTerminators() { - for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end(); - bbItr != bbEnd; ++bbItr) { - MachineBasicBlock *mbb = &*bbItr; - MachineBasicBlock *a = 0, *b = 0; - SmallVector<MachineOperand, 4> c; - if (tii->AnalyzeBranch(*mbb, a, b, c)) { - dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n"; - dbgs() << " Terminators:\n"; - for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end(); - iItr != iEnd; ++iItr) { - MachineInstr *instr= &*iItr; - dbgs() << " " << *instr << ""; - } - dbgs() << "\n Listed successors: [ "; - for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end(); - sItr != sEnd; ++sItr) { - MachineBasicBlock *succMBB = *sItr; - dbgs() << succMBB->getNumber() << " "; - } - dbgs() << "]\n\n"; - } - } - } - - void LoopSplitter::dumpLoopInfo(MachineLoop &loop) { - MachineBasicBlock &headerBlock = *loop.getHeader(); - typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; - ExitEdgesList exitEdges; - loop.getExitEdges(exitEdges); - - dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ "; - for (std::vector<MachineBasicBlock*>::const_iterator - subBlockItr = loop.getBlocks().begin(), - subBlockEnd = loop.getBlocks().end(); - subBlockItr != subBlockEnd; ++subBlockItr) { - MachineBasicBlock &subBlock = **subBlockItr; - dbgs() << "BB#" << subBlock.getNumber() << " "; - } - dbgs() << "], Exit edges: [ "; - for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), - exitEdgeEnd = exitEdges.end(); - exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { - MachineLoop::Edge &exitEdge = *exitEdgeItr; - dbgs() << "(MBB#" << exitEdge.first->getNumber() - << ", MBB#" << exitEdge.second->getNumber() << ") "; - } - dbgs() << "], Sub-Loop Headers: [ "; - for (MachineLoop::iterator subLoopItr = loop.begin(), - subLoopEnd = loop.end(); - subLoopItr != subLoopEnd; ++subLoopItr) { - MachineLoop &subLoop = **subLoopItr; - MachineBasicBlock &subLoopBlock = *subLoop.getHeader(); - dbgs() << "BB#" << subLoopBlock.getNumber() << " "; - } - dbgs() << "]\n"; - } - - void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) { - mbb.updateTerminator(); - - for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end(); - miItr != miEnd; ++miItr) { - if (lis->isNotInMIMap(miItr)) { - lis->InsertMachineInstrInMaps(miItr); - } - } - } - - bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) { - MachineBasicBlock *header = loop.getHeader(); - MachineBasicBlock *a = 0, *b = 0; - SmallVector<MachineOperand, 4> c; - - for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(), - pbEnd = header->pred_end(); - pbItr != pbEnd; ++pbItr) { - MachineBasicBlock *predBlock = *pbItr; - if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) { - return false; - } - } - - MachineFunction::iterator headerItr(header); - if (headerItr == mf->begin()) - return true; - MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr); - assert(headerLayoutPred != 0 && "Header should have layout pred."); - - return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c)); - } - - MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) { - assert(loop.getLoopPreheader() == 0 && "Loop already has preheader."); - - MachineBasicBlock &header = *loop.getHeader(); - - // Save the preds - we'll need to update them once we insert the preheader. - typedef std::set<MachineBasicBlock*> HeaderPreds; - HeaderPreds headerPreds; - - for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), - predEnd = header.pred_end(); - predItr != predEnd; ++predItr) { - if (!loop.contains(*predItr)) - headerPreds.insert(*predItr); - } - - assert(!headerPreds.empty() && "No predecessors for header?"); - - //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader..."; - - MachineBasicBlock *preHeader = - mf->CreateMachineBasicBlock(header.getBasicBlock()); - - assert(preHeader != 0 && "Failed to create pre-header."); - - mf->insert(header, preHeader); - - for (HeaderPreds::iterator hpItr = headerPreds.begin(), - hpEnd = headerPreds.end(); - hpItr != hpEnd; ++hpItr) { - assert(*hpItr != 0 && "How'd a null predecessor get into this set?"); - MachineBasicBlock &hp = **hpItr; - hp.ReplaceUsesOfBlockWith(&header, preHeader); - } - preHeader->addSuccessor(&header); - - MachineBasicBlock *oldLayoutPred = - llvm::prior(MachineFunction::iterator(preHeader)); - if (oldLayoutPred != 0) { - updateTerminators(*oldLayoutPred); - } - - lis->InsertMBBInMaps(preHeader); - - if (MachineLoop *parentLoop = loop.getParentLoop()) { - assert(parentLoop->getHeader() != loop.getHeader() && - "Parent loop has same header?"); - parentLoop->addBasicBlockToLoop(preHeader, mli->getBase()); - - // Invalidate all parent loop ranges. - while (parentLoop != 0) { - loopRangeMap.erase(parentLoop); - parentLoop = parentLoop->getParentLoop(); - } - } - - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval &li = *liItr->second; - - // Is this safe for physregs? - // TargetRegisterInfo::isPhysicalRegister(li.reg) || - if (!lis->isLiveInToMBB(li, &header)) - continue; - - if (lis->isLiveInToMBB(li, preHeader)) { - assert(lis->isLiveOutOfMBB(li, preHeader) && - "Range terminates in newly added preheader?"); - continue; - } - - bool insertRange = false; - - for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(), - predEnd = preHeader->pred_end(); - predItr != predEnd; ++predItr) { - MachineBasicBlock *predMBB = *predItr; - if (lis->isLiveOutOfMBB(li, predMBB)) { - insertRange = true; - break; - } - } - - if (!insertRange) - continue; - - SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader); - assert(lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator()); - li.addRange(LiveRange(lis->getMBBStartIdx(preHeader), - lis->getMBBEndIdx(preHeader), - newVal)); - } - - - //dbgs() << "Dumping SlotIndexes:\n"; - //sis->dump(); - - //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n"; - - return *preHeader; - } - - bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) { - assert(edge.first->succ_size() > 1 && "Non-sensical edge."); - if (edge.second->pred_size() > 1) - return true; - return false; - } - - bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) { - MachineFunction::iterator outBlockItr(edge.second); - if (outBlockItr == mf->begin()) - return true; - MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr); - assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin."); - MachineBasicBlock *a = 0, *b = 0; - SmallVector<MachineOperand, 4> c; - return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) && - !tii->AnalyzeBranch(*edge.first, a, b, c)); - } - - MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge, - MachineLoop &loop) { - - MachineBasicBlock &inBlock = *edge.first; - MachineBasicBlock &outBlock = *edge.second; - - assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) && - "Splitting non-critical edge?"); - - //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber() - // << " -> MBB#" << outBlock.getNumber() << ")..."; - - MachineBasicBlock *splitBlock = - mf->CreateMachineBasicBlock(); - - assert(splitBlock != 0 && "Failed to create split block."); - - mf->insert(&outBlock, splitBlock); - - inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock); - splitBlock->addSuccessor(&outBlock); - - MachineBasicBlock *oldLayoutPred = - llvm::prior(MachineFunction::iterator(splitBlock)); - if (oldLayoutPred != 0) { - updateTerminators(*oldLayoutPred); - } - - lis->InsertMBBInMaps(splitBlock); - - loopRangeMap.erase(&loop); - - MachineLoop *splitParentLoop = loop.getParentLoop(); - while (splitParentLoop != 0 && - !splitParentLoop->contains(&outBlock)) { - splitParentLoop = splitParentLoop->getParentLoop(); - } - - if (splitParentLoop != 0) { - assert(splitParentLoop->contains(&loop) && - "Split-block parent doesn't contain original loop?"); - splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase()); - - // Invalidate all parent loop ranges. - while (splitParentLoop != 0) { - loopRangeMap.erase(splitParentLoop); - splitParentLoop = splitParentLoop->getParentLoop(); - } - } - - - for (LiveIntervals::iterator liItr = lis->begin(), - liEnd = lis->end(); - liItr != liEnd; ++liItr) { - LiveInterval &li = *liItr->second; - bool intersects = lis->isLiveOutOfMBB(li, &inBlock) && - lis->isLiveInToMBB(li, &outBlock); - if (lis->isLiveInToMBB(li, splitBlock)) { - if (!intersects) { - li.removeRange(lis->getMBBStartIdx(splitBlock), - lis->getMBBEndIdx(splitBlock), true); - } - } else if (intersects) { - SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock); - assert(lis->getInstructionFromIndex(newDefIdx) == 0 && - "PHI def index points at actual instruction."); - VNInfo *newVal = li.getNextValue(newDefIdx, 0, - lis->getVNInfoAllocator()); - li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock), - lis->getMBBEndIdx(splitBlock), - newVal)); - } - } - - //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n"; - - return *splitBlock; - } - - LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) { - typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet; - LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop); - if (lrItr == loopRangeMap.end()) { - LoopMBBSet loopMBBs((StartSlotComparator(*lis))); - std::copy(loop.block_begin(), loop.block_end(), - std::inserter(loopMBBs, loopMBBs.begin())); - - assert(!loopMBBs.empty() && "No blocks in loop?"); - - LoopRanges &loopRanges = loopRangeMap[&loop]; - assert(loopRanges.empty() && "Loop encountered but not processed?"); - SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin()); - loopRanges.push_back( - std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()), - lis->getInvalidIndex())); - for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()), - curBlockEnd = loopMBBs.end(); - curBlockItr != curBlockEnd; ++curBlockItr) { - SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr); - if (newStart != oldEnd) { - loopRanges.back().second = oldEnd; - loopRanges.push_back(std::make_pair(newStart, - lis->getInvalidIndex())); - } - oldEnd = lis->getMBBEndIdx(*curBlockItr); - } - - loopRanges.back().second = - lis->getMBBEndIdx(*llvm::prior(loopMBBs.end())); - - return loopRanges; - } - return lrItr->second; - } - - std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange( - const LiveRange &lr, - MachineLoop &loop) { - LoopRanges &loopRanges = getLoopRanges(loop); - LoopRanges::iterator lrItr = loopRanges.begin(), - lrEnd = loopRanges.end(); - while (lrItr != lrEnd && lr.start >= lrItr->second) { - ++lrItr; - } - - if (lrItr == lrEnd) { - SlotIndex invalid = lis->getInvalidIndex(); - return std::make_pair(false, SlotPair(invalid, invalid)); - } - - SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start); - SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end); - - return std::make_pair(true, SlotPair(srStart, srEnd)); - } - - void LoopSplitter::dumpLoopRanges(MachineLoop &loop) { - LoopRanges &loopRanges = getLoopRanges(loop); - dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ "; - for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end(); - lrItr != lrEnd; ++lrItr) { - dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") "; - } - dbgs() << "]\n"; - } - - void LoopSplitter::processHeader(LoopSplit &split) { - MachineBasicBlock &header = *split.getLoop().getHeader(); - //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n"; - - if (!lis->isLiveInToMBB(split.getLI(), &header)) - return; // Not live in, but nothing wrong so far. - - MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader(); - if (!preHeader) { - - if (!canInsertPreHeader(split.getLoop())) { - split.invalidate(); - return; // Couldn't insert a pre-header. Bail on this interval. - } - - for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(), - predEnd = header.pred_end(); - predItr != predEnd; ++predItr) { - if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) { - split.splitIncoming(); - break; - } - } - } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) { - split.splitIncoming(); - } - } - - void LoopSplitter::processLoopExits(LoopSplit &split) { - typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList; - ExitEdgesList exitEdges; - split.getLoop().getExitEdges(exitEdges); - - //dbgs() << " Processing loop exits:\n"; - - for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(), - exitEdgeEnd = exitEdges.end(); - exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) { - MachineLoop::Edge exitEdge = *exitEdgeItr; - - LiveRange *outRange = - split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second)); - - if (outRange != 0) { - if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) { - split.invalidate(); - return; - } - - split.splitOutgoing(exitEdge); - } - } - } - - void LoopSplitter::processLoopUses(LoopSplit &split) { - std::set<MachineInstr*> processed; - - for (MachineRegisterInfo::reg_iterator - rItr = mri->reg_begin(split.getLI().reg), - rEnd = mri->reg_end(); - rItr != rEnd; ++rItr) { - MachineInstr &instr = *rItr; - if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) { - split.addLoopInstr(&instr); - processed.insert(&instr); - } - } - - //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg - // << " in blocks [ "; - //dbgs() << "]\n"; - } - - bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) { - assert(TargetRegisterInfo::isVirtualRegister(li.reg) && - "Attempt to split physical register."); - - LoopSplit split(*this, li, loop); - processHeader(split); - if (split.isValid()) - processLoopExits(split); - if (split.isValid()) - processLoopUses(split); - if (split.isValid() /* && split.isWorthwhile() */) { - split.apply(); - DEBUG(dbgs() << "Success.\n"); - return true; - } - DEBUG(dbgs() << "Failed.\n"); - return false; - } - - void LoopSplitter::processInterval(LiveInterval &li) { - std::deque<MachineLoop*> loops; - std::copy(mli->begin(), mli->end(), std::back_inserter(loops)); - - while (!loops.empty()) { - MachineLoop &loop = *loops.front(); - loops.pop_front(); - DEBUG( - dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#" - << loop.getHeader()->getNumber() << " "; - ); - if (!splitOverLoop(li, loop)) { - // Couldn't split over outer loop, schedule sub-loops to be checked. - std::copy(loop.begin(), loop.end(), std::back_inserter(loops)); - } - } - } - - void LoopSplitter::processIntervals() { - while (!intervals.empty()) { - LiveInterval &li = *intervals.front(); - intervals.pop_front(); - - assert(!lis->intervalIsInOneMBB(li) && - "Single interval in process worklist."); - - processInterval(li); - } - } - -} diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h deleted file mode 100644 index 9fb1b8b..0000000 --- a/lib/CodeGen/Splitter.h +++ /dev/null @@ -1,101 +0,0 @@ -//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SPLITTER_H -#define LLVM_CODEGEN_SPLITTER_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" - -#include <deque> -#include <map> -#include <string> -#include <vector> - -namespace llvm { - - class LiveInterval; - class LiveIntervals; - struct LiveRange; - class LoopSplit; - class MachineDominatorTree; - class MachineRegisterInfo; - class SlotIndexes; - class TargetInstrInfo; - class VNInfo; - - class LoopSplitter : public MachineFunctionPass { - friend class LoopSplit; - public: - static char ID; - - LoopSplitter() : MachineFunctionPass(ID) { - initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &au) const; - - virtual bool runOnMachineFunction(MachineFunction &fn); - - virtual void releaseMemory(); - - - private: - - MachineFunction *mf; - LiveIntervals *lis; - MachineLoopInfo *mli; - MachineRegisterInfo *mri; - MachineDominatorTree *mdt; - SlotIndexes *sis; - const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; - - std::string fqn; - std::deque<LiveInterval*> intervals; - - typedef std::pair<SlotIndex, SlotIndex> SlotPair; - typedef std::vector<SlotPair> LoopRanges; - typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap; - LoopRangeMap loopRangeMap; - - void dumpLoopInfo(MachineLoop &loop); - - void dumpOddTerminators(); - - void updateTerminators(MachineBasicBlock &mbb); - - bool canInsertPreHeader(MachineLoop &loop); - MachineBasicBlock& insertPreHeader(MachineLoop &loop); - - bool isCriticalEdge(MachineLoop::Edge &edge); - bool canSplitEdge(MachineLoop::Edge &edge); - MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop); - - LoopRanges& getLoopRanges(MachineLoop &loop); - std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr, - MachineLoop &loop); - - void dumpLoopRanges(MachineLoop &loop); - - void processHeader(LoopSplit &split); - void processLoopExits(LoopSplit &split); - void processLoopUses(LoopSplit &split); - - bool splitOverLoop(LiveInterval &li, MachineLoop &loop); - - void processInterval(LiveInterval &li); - - void processIntervals(); - }; - -} - -#endif diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 3a6211a..031377b 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -553,7 +553,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, bool HasIndirectbr = false; if (!TailBB.empty()) - HasIndirectbr = TailBB.back().getDesc().isIndirectBranch(); + HasIndirectbr = TailBB.back().isIndirectBranch(); if (HasIndirectbr && PreRegAlloc) MaxDuplicateCount = 20; @@ -561,22 +561,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. unsigned InstrCount = 0; - for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); - ++I) { + for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) + if (I->isNotDuplicable()) return false; // Do not duplicate 'return' instructions if this is a pre-regalloc run. // A return may expand into a lot more instructions (e.g. reload of callee // saved registers) after PEI. - if (PreRegAlloc && I->getDesc().isReturn()) + if (PreRegAlloc && I->isReturn()) return false; // Avoid duplicating calls before register allocation. Calls presents a // barrier to register allocation so duplicating them may end up increasing // spills. - if (PreRegAlloc && I->getDesc().isCall()) + if (PreRegAlloc && I->isCall()) return false; if (!I->isPHI() && !I->isDebugValue()) @@ -611,7 +610,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { ++I; if (I == E) return true; - return I->getDesc().isUnconditionalBranch(); + return I->isUnconditionalBranch(); } static bool diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 122f869..cadb878 100644 --- a/lib/Target/TargetFrameLowering.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -1,4 +1,4 @@ -//===----- TargetFrameLowering.cpp - Implement target frame interface ------==// +//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// // // The LLVM Compiler Infrastructure // diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 08e2b16..7ed9455 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -121,6 +122,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { + assert(!MI->isBundle() && + "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); if (!MCID.isCommutable()) return false; @@ -136,11 +140,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, } +bool +TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + if (!MI->isPredicable()) + return true; + return !isPredicated(MI); +} + + bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { bool MadeChange = false; + + assert(!MI->isBundle() && + "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles"); + const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -218,7 +239,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, MachineFunction &MF) const { - assert(!Orig->getDesc().isNotDuplicable() && + assert(!Orig->isNotDuplicable() && "Instruction cannot be duplicated"); return MF.CloneMachineInstr(Orig); } @@ -288,10 +309,10 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || - NewMI->getDesc().mayStore()) && + NewMI->mayStore()) && "Folded a def to a non-store!"); assert((!(Flags & MachineMemOperand::MOLoad) || - NewMI->getDesc().mayLoad()) && + NewMI->mayLoad()) && "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); @@ -331,7 +352,7 @@ MachineInstr* TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr* LoadMI) const { - assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); + assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG for (unsigned i = 0, e = Ops.size(); i != e; ++i) assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); @@ -382,10 +403,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; - const MCInstrDesc &MCID = MI->getDesc(); - // Avoid instructions obviously unsafe for remat. - if (MCID.isNotDuplicable() || MCID.mayStore() || + if (MI->isNotDuplicable() || MI->mayStore() || MI->hasUnmodeledSideEffects()) return false; @@ -395,7 +414,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return false; // Avoid instructions which load from potentially varying memory. - if (MCID.mayLoad() && !MI->isInvariantLoad(AA)) + if (MI->mayLoad() && !MI->isInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume @@ -456,7 +475,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const{ // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Don't attempt to schedule around any instruction that defines @@ -492,3 +511,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return (ScheduleHazardRecognizer *) new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } + +int +TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + if (!DefNode->isMachineOpcode()) + return -1; + + unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); + if (!UseNode->isMachineOpcode()) + return ItinData->getOperandCycle(DefClass, DefIdx); + unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + if (!N->isMachineOpcode()) + return 1; + + return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); +} + diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index c43e5b6..7fe164a 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -536,9 +536,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : - MachOMMI.getGVStubEntry(SSym); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = Mang->getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp new file mode 100644 index 0000000..0f59d01 --- /dev/null +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -0,0 +1,52 @@ +//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the methods in the TargetOptions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +/// DisableFramePointerElim - This returns true if frame pointer elimination +/// optimization should be disabled for the given machine function. +bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { + // Check to see if we should eliminate non-leaf frame pointers and then + // check to see if we should eliminate all frame pointers. + if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasCalls(); + } + + return NoFramePointerElim; +} + +/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option +/// is specified on the command line. When this flag is off(default), the +/// code generator is not allowed to generate mad (multiply add) if the +/// result is "less precise" than doing those operations individually. +bool TargetOptions::LessPreciseFPMAD() const { + return UnsafeFPMath || LessPreciseFPMADOption; +} + +/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume +/// that the rounding mode of the FPU can change from its default. +bool TargetOptions::HonorSignDependentRoundingFPMath() const { + return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; +} + +/// getTrapFunctionName - If this returns a non-empty string, this means isel +/// should lower Intrinsic::trap to a call to the specified function name +/// instead of an ISD::TRAP node. +StringRef TargetOptions::getTrapFunctionName() const { + return TrapFuncName; +} + diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index a2e8134..6a63335 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -242,7 +242,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->getDesc().isTerminator()) + KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -498,8 +498,7 @@ MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, MachineInstr *UseMI = &*UI; if (UseMI == MI || UseMI->getParent() != MBB) continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end()) + if (DistanceMap.count(UseMI)) continue; if (!UI.getOperand().isKill()) return 0; @@ -817,10 +816,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector<unsigned, 4> &Kills) { - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayStore() || MCID.isCall()) + if (MI->mayStore() || MI->isCall()) return false; - if (MCID.isTerminator() || MI->hasUnmodeledSideEffects()) + if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -918,9 +916,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, // Don't mess with copies, they may be coalesced later. return false; - const MCInstrDesc &MCID = KillMI->getDesc(); - if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() || - MCID.isTerminator()) + if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() || + KillMI->isBranch() || KillMI->isTerminator()) // Don't move pass calls, etc. return false; @@ -975,9 +972,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; - const MCInstrDesc &OMCID = OtherMI->getDesc(); - if (OMCID.hasUnmodeledSideEffects() || OMCID.isCall() || OMCID.isBranch() || - OMCID.isTerminator()) + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) // Don't move pass calls, etc. return false; for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { @@ -1119,9 +1115,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; - const MCInstrDesc &MCID = OtherMI->getDesc(); - if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() || - MCID.isTerminator()) + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) // Don't move pass calls, etc. return false; SmallVector<unsigned, 2> OtherDefs; @@ -1201,7 +1196,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; MachineInstr &MI = *mi; - const MCInstrDesc &MCID = MI.getDesc(); unsigned regA = MI.getOperand(DstIdx).getReg(); unsigned regB = MI.getOperand(SrcIdx).getReg(); @@ -1223,7 +1217,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (MCID.isCommutable() && MI.getNumOperands() >= 3 && + if (MI.isCommutable() && MI.getNumOperands() >= 3 && TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; @@ -1261,7 +1255,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); - if (MCID.isConvertibleTo3Addr()) { + if (MI.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { @@ -1288,7 +1282,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // movq (%rax), %rcx // addq %rdx, %rcx // because it's preferable to schedule a load than a register copy. - if (MCID.mayLoad() && !regBKilled) { + if (MI.mayLoad() && !regBKilled) { // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = @@ -1531,7 +1525,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && - DefMI->getDesc().isAsCheapAsAMove() && + DefMI->isAsCheapAsAMove() && DefMI->isSafeToReMat(TII, AA, regB) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index fdffcb6..441f1e8 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -10,7 +10,3 @@ add_llvm_library(LLVMDebugInfo DWARFDebugLine.cpp DWARFFormValue.cpp ) - -add_llvm_library_dependencies(LLVMDebugInfo - LLVMSupport - ) diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt index b46d3d2..210b9f9 100644 --- a/lib/DebugInfo/LLVMBuild.txt +++ b/lib/DebugInfo/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = DebugInfo parent = Libraries required_libraries = Support - diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index fb14d41..58caae8 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -4,13 +4,6 @@ add_llvm_library(LLVMExecutionEngine TargetSelect.cpp ) -add_llvm_library_dependencies(LLVMExecutionEngine - LLVMCore - LLVMMC - LLVMSupport - LLVMTarget - ) - add_subdirectory(Interpreter) add_subdirectory(JIT) add_subdirectory(MCJIT) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 525877b..7829a29 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Host.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include <cmath> @@ -41,14 +42,12 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)( Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) = 0; ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) = 0; ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, @@ -420,7 +419,7 @@ ExecutionEngine *ExecutionEngine::create(Module *M, ExecutionEngine *ExecutionEngine::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, + CodeGenOpt::Level OL, bool GVsWithCode, Reloc::Model RM, CodeModel::Model CMM) { @@ -436,11 +435,14 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, StringRef MCPU = ""; SmallVector<std::string, 1> MAttrs; + Triple TT(M->getTargetTriple()); + // TODO: permit custom TargetOptions here TargetMachine *TM = - EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr); + EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs, TargetOptions(), RM, + CMM, OL, ErrorStr); if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; - return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM); + return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM); } ExecutionEngine *EngineBuilder::create() { @@ -465,17 +467,25 @@ ExecutionEngine *EngineBuilder::create() { // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. if (WhichEngine & EngineKind::JIT) { - if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, + Triple TT(M->getTargetTriple()); + if (TargetMachine *TM = EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs, + Options, RelocModel, CMModel, - ErrorStr)) { + OptLevel, ErrorStr)) { + if (!TM->getTarget().hasJIT()) { + errs() << "WARNING: This target JIT is not designed for the host" + << " you are running. If bad things happen, please choose" + << " a different -march switch.\n"; + } + if (UseMCJIT && ExecutionEngine::MCJITCtor) { ExecutionEngine *EE = - ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel, + ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, AllocateGVsWithCode, TM); if (EE) return EE; } else if (ExecutionEngine::JITCtor) { ExecutionEngine *EE = - ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, + ExecutionEngine::JITCtor(M, ErrorStr, JMM, AllocateGVsWithCode, TM); if (EE) return EE; } diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt index 4fb58c2..d331f83 100644 --- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt +++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt @@ -12,14 +12,6 @@ add_llvm_library(LLVMInterpreter Interpreter.cpp ) -add_llvm_library_dependencies(LLVMInterpreter - LLVMCodeGen - LLVMCore - LLVMExecutionEngine - LLVMSupport - LLVMTarget - ) - if( LLVM_ENABLE_FFI ) target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} ) endif() diff --git a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt index 459426d..327b320 100644 --- a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt +++ b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = Interpreter parent = ExecutionEngine required_libraries = CodeGen Core ExecutionEngine Support Target - diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt index 813ccce..cefb0ae 100644 --- a/lib/ExecutionEngine/JIT/CMakeLists.txt +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -10,13 +10,3 @@ add_llvm_library(LLVMJIT JITMemoryManager.cpp OProfileJITEventListener.cpp ) - -add_llvm_library_dependencies(LLVMJIT - LLVMCodeGen - LLVMCore - LLVMExecutionEngine - LLVMMC - LLVMRuntimeDyld - LLVMSupport - LLVMTarget - ) diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index d773009..e4f6bc4 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -206,7 +206,6 @@ void DarwinRegisterFrame(void* FrameBegin) { ExecutionEngine *JIT::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) { // Try to register the program as a source of symbols to resolve against. @@ -216,7 +215,7 @@ ExecutionEngine *JIT::createJIT(Module *M, // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) { - return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode); + return new JIT(M, *TM, *TJ, JMM, GVsWithCode); } else { if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; @@ -268,7 +267,7 @@ extern "C" { } JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode) + JITMemoryManager *JMM, bool GVsWithCode) : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { setTargetData(TM.getTargetData()); @@ -288,7 +287,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, // Turn the machine code intermediate representation into bytes in memory that // may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE)) { report_fatal_error("Target does not support machine code emission!"); } @@ -341,7 +340,7 @@ void JIT::addModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE)) { report_fatal_error("Target does not support machine code emission!"); } @@ -372,7 +371,7 @@ bool JIT::removeModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE)) { report_fatal_error("Target does not support machine code emission!"); } diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index 92dcb0e..fbb9416 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -78,8 +78,7 @@ class JIT : public ExecutionEngine { JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, - bool AllocateGVsWithCode); + JITMemoryManager *JMM, bool AllocateGVsWithCode); public: ~JIT(); @@ -185,7 +184,6 @@ public: static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM); diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index 2e90968..abb70fb 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -115,7 +115,7 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) { // When trying to debug why GDB isn't getting the debug info right, it's // awfully helpful to write the object file to disk so that it can be // inspected with readelf and objdump. - if (JITEmitDebugInfoToDisk) { + if (TM.Options.JITEmitDebugInfoToDisk) { std::string Filename; raw_string_ostream O2(Filename); O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getName() << ".o"; diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index 8f84ac7..42a136e 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -313,7 +313,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { if (!MI->isLabel()) { - MayThrow |= MI->getDesc().isCall(); + MayThrow |= MI->isCall(); continue; } diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 24020ee..d9fa509 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -362,10 +362,16 @@ namespace { /// Instance of the JIT JIT *TheJIT; + bool JITExceptionHandling; + + bool JITEmitDebugInfo; + public: JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), - EmittedFunctions(this), TheJIT(&jit) { + EmittedFunctions(this), TheJIT(&jit), + JITExceptionHandling(TM.Options.JITExceptionHandling), + JITEmitDebugInfo(TM.Options.JITEmitDebugInfo) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); @@ -1037,7 +1043,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { EmittedFunctions.erase(Emitted); } - if(JITExceptionHandling) { + if (JITExceptionHandling) { TheJIT->DeregisterTable(F); } @@ -1047,7 +1053,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { } -void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { +void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { if (BufferBegin) return JITCodeEmitter::allocateSpace(Size, Alignment); @@ -1059,7 +1065,7 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { return CurBufferPtr; } -void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { +void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { // Delegate this call through the memory manager. return MemMgr->allocateGlobal(Size, Alignment); } diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt index 21cb300..ca2a565 100644 --- a/lib/ExecutionEngine/JIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = JIT parent = ExecutionEngine required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target - diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt index 1ef6a44..d426969 100644 --- a/lib/ExecutionEngine/LLVMBuild.txt +++ b/lib/ExecutionEngine/LLVMBuild.txt @@ -15,9 +15,11 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = Interpreter JIT MCJIT RuntimeDyld + [component_0] type = Library name = ExecutionEngine parent = Libraries required_libraries = Core MC Support Target - diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index aae8a1b..38fdffa 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -2,11 +2,3 @@ add_llvm_library(LLVMMCJIT MCJIT.cpp Intercept.cpp ) - -add_llvm_library_dependencies(LLVMMCJIT - LLVMCore - LLVMExecutionEngine - LLVMRuntimeDyld - LLVMSupport - LLVMTarget - ) diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt index 9b08d3b..90f4d2f 100644 --- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = MCJIT parent = ExecutionEngine required_libraries = Core ExecutionEngine RuntimeDyld Support Target - diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index d5f407d..d5aaec9 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -36,7 +36,6 @@ extern "C" void LLVMLinkInMCJIT() { ExecutionEngine *MCJIT::createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM) { // Try to register the program as a source of symbols to resolve against. @@ -46,8 +45,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) - return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel, - GVsWithCode); + return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), GVsWithCode); if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; @@ -55,8 +53,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M, } MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel, - bool AllocateGVsWithCode) + RTDyldMemoryManager *MM, bool AllocateGVsWithCode) : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) { setTargetData(TM->getTargetData()); diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index b64c21a..2a98fc9 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -24,8 +24,7 @@ namespace llvm { class MCJIT : public ExecutionEngine { MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel, - bool AllocateGVsWithCode); + RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode); TargetMachine *TM; MCContext *Ctx; @@ -79,7 +78,6 @@ public: static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel, bool GVsWithCode, TargetMachine *TM); diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt index c236d1d..59bdfee3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt +++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -2,8 +2,3 @@ add_llvm_library(LLVMRuntimeDyld RuntimeDyld.cpp RuntimeDyldMachO.cpp ) - -add_llvm_library_dependencies(LLVMRuntimeDyld - LLVMObject - LLVMSupport - ) diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt index 5e39814..97dc861 100644 --- a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt +++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = RuntimeDyld parent = ExecutionEngine required_libraries = Object Support - diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 45480a6..3937fe5 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -7,33 +7,35 @@ // //===----------------------------------------------------------------------===// // -// This just asks the TargetRegistry for the appropriate JIT to use, and allows -// the user to specify a specific one on the commandline with -march=x. Clients -// should initialize targets prior to calling createJIT. +// This just asks the TargetRegistry for the appropriate target to use, and +// allows the user to specify a specific one on the commandline with -march=x, +// -mcpu=y, and -mattr=a,-b,+c. Clients should initialize targets prior to +// calling selectTarget(). // //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/Module.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" + using namespace llvm; /// selectTarget - Pick a target either via -march or by guessing the native /// arch. Add any CPU features specified via -mcpu or -mattr. -TargetMachine *EngineBuilder::selectTarget(Module *Mod, +TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, StringRef MArch, StringRef MCPU, const SmallVectorImpl<std::string>& MAttrs, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, std::string *ErrorStr) { - Triple TheTriple(Mod->getTargetTriple()); + Triple TheTriple(TargetTriple); if (TheTriple.getTriple().empty()) TheTriple.setTriple(sys::getDefaultTargetTriple()); @@ -55,7 +57,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, } // Adjust the triple to match (if known), otherwise stick with the - // module/host triple. + // requested/host triple. Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch); if (Type != Triple::UnknownArch) TheTriple.setArch(Type); @@ -69,12 +71,6 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, } } - if (!TheTarget->hasJIT()) { - errs() << "WARNING: This target JIT is not designed for the host you are" - << " running. If bad things happen, please choose a different " - << "-march switch.\n"; - } - // Package up features to be passed to target/subtarget std::string FeaturesStr; if (!MAttrs.empty()) { @@ -87,7 +83,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, // Allocate a target... TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, - RM, CM); + Options, + RM, CM, OL); assert(Target && "Could not allocate target machine!"); return Target; } diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt index c3fa1ff..e22b8cd 100644 --- a/lib/LLVMBuild.txt +++ b/lib/LLVMBuild.txt @@ -15,8 +15,10 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore + [component_0] type = Group name = Libraries parent = $ROOT - diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt index 4d8824b..0b6d2f4 100644 --- a/lib/Linker/CMakeLists.txt +++ b/lib/Linker/CMakeLists.txt @@ -4,11 +4,3 @@ add_llvm_library(LLVMLinker LinkModules.cpp Linker.cpp ) - -add_llvm_library_dependencies(LLVMLinker - LLVMArchive - LLVMBitReader - LLVMCore - LLVMSupport - LLVMTransformUtils - ) diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt index 69f2ac4..2b4c232 100644 --- a/lib/Linker/LLVMBuild.txt +++ b/lib/Linker/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = Linker parent = Libraries required_libraries = Archive BitReader Core Support TransformUtils - diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index a4ac1bf..b2e62a5 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -45,10 +45,5 @@ add_llvm_library(LLVMMC WinCOFFStreamer.cpp ) -add_llvm_library_dependencies(LLVMMC - LLVMObject - LLVMSupport - ) - add_subdirectory(MCParser) add_subdirectory(MCDisassembler) diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index bd28069..92aad94 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -182,7 +182,7 @@ uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data, if (const MCExpr *Value = Symbol.getVariableValue()) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue, Layout)) - return (uint64_t)IntValue; + return (uint64_t)IntValue; } } @@ -1072,7 +1072,7 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm, WriteBytes(cast<MCDataFragment>(F).getContents().str()); } } else { - Asm.WriteSectionData(&SD, Layout); + Asm.writeSectionData(&SD, Layout); } } @@ -1742,14 +1742,26 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, } } else { if (IsPCRel) { - switch (Modifier) { - default: - llvm_unreachable("Unimplemented"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_386_PC32; + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + + case X86::reloc_global_offset_table: + Type = ELF::R_386_GOTPC; break; - case MCSymbolRefExpr::VK_PLT: - Type = ELF::R_386_PLT32; + + case X86::reloc_signed_4byte: + case FK_PCRel_4: + case FK_Data_4: + switch (Modifier) { + default: + llvm_unreachable("Unimplemented"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_386_PC32; + break; + case MCSymbolRefExpr::VK_PLT: + Type = ELF::R_386_PLT32; + break; + } break; } } else { @@ -1831,6 +1843,21 @@ void MipsELFObjectWriter::WriteEFlags() { ELF::EF_MIPS_ARCH_32R2); } +const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const { + assert(Target.getSymA() && "SymA cannot be 0."); + const MCSymbol &Sym = Target.getSymA()->getSymbol(); + + if (Sym.getSection().getKind().isMergeableCString() || + Sym.getSection().getKind().isMergeableConst()) + return &Sym; + + return NULL; +} + unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, @@ -1858,7 +1885,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_Mips_CALL16: Type = ELF::R_MIPS_CALL16; break; - case Mips::fixup_Mips_GOT16: + case Mips::fixup_Mips_GOT_Global: + case Mips::fixup_Mips_GOT_Local: Type = ELF::R_MIPS_GOT16; break; case Mips::fixup_Mips_HI16: @@ -1887,4 +1915,3 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } - diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h index 7838206..9adf0b1 100644 --- a/lib/MC/ELFObjectWriter.h +++ b/lib/MC/ELFObjectWriter.h @@ -445,6 +445,12 @@ class ELFObjectWriter : public MCObjectWriter { virtual void WriteEFlags(); protected: + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const; + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend); diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt index 8ad66b6..f35dbe4 100644 --- a/lib/MC/LLVMBuild.txt +++ b/lib/MC/LLVMBuild.txt @@ -15,9 +15,11 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = MCDisassembler MCParser + [component_0] type = Library name = MC parent = Libraries required_libraries = Object Support - diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index c330e74..b1e1bdf 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -29,7 +29,6 @@ MCAsmInfo::MCAsmInfo() { HasSubsectionsViaSymbols = false; HasMachoZeroFillDirective = false; HasMachoTBSSDirective = false; - StructorOutputOrder = Structors::ReversePriorityOrder; HasStaticCtorDtorReferenceInStaticMode = false; LinkerRequiresNonEmptyDwarfLines = false; MaxInstLength = 4; diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp index 434d910..6d34801 100644 --- a/lib/MC/MCAsmInfoCOFF.cpp +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -38,3 +38,11 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { SupportsDataRegions = false; } + +MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() { + AllowQuotesInName = true; +} + +MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() { + +} diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 537d0a3..24f1243 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -39,7 +39,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. HasMachoZeroFillDirective = true; // Uses .zerofill HasMachoTBSSDirective = true; // Uses .tbss - StructorOutputOrder = Structors::PriorityOrder; HasStaticCtorDtorReferenceInStaticMode = true; CodeBegin = "L$start$code$"; @@ -57,8 +56,9 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { HiddenVisibilityAttr = MCSA_PrivateExtern; HiddenDeclarationVisibilityAttr = MCSA_Invalid; + // Doesn't support protected visibility. - ProtectedVisibilityAttr = MCSA_Global; + ProtectedVisibilityAttr = MCSA_Invalid; HasDotTypeDotSizeDirective = false; HasNoDeadStrip = true; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index d90f7b2..c785c03 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -1284,6 +1284,10 @@ void MCAsmStreamer::Finish() { if (getContext().hasDwarfFiles() && !UseLoc) MCDwarfFileTable::Emit(this); + // If we are generating dwarf for assembly source files dump out the sections. + if (getContext().getGenDwarfForAssembly()) + MCGenDwarfInfo::Emit(this); + if (!UseCFI) EmitFrames(false); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 29adbcb..c5bf6b9 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -33,7 +33,7 @@ using namespace llvm; namespace { namespace stats { STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); -STATISTIC(EvaluateFixup, "Number of evaluated fixups"); +STATISTIC(evaluateFixup, "Number of evaluated fixups"); STATISTIC(FragmentLayouts, "Number of fragment layouts"); STATISTIC(ObjectBytes, "Number of emitted object file bytes"); STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps"); @@ -136,7 +136,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const { uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { // The size is the last fragment's end offset. const MCFragment &F = SD->getFragmentList().back(); - return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F); + return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F); } uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { @@ -237,10 +237,10 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { return SD->getFragment()->getAtom(); } -bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, +bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, uint64_t &Value) const { - ++stats::EvaluateFixup; + ++stats::evaluateFixup; if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout)) report_fatal_error("expected relocatable expression"); @@ -312,7 +312,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, return IsResolved; } -uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout, +uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const { switch (F.getKind()) { case MCFragment::FT_Data: @@ -374,7 +374,7 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) { // Compute fragment offset and size. uint64_t Offset = 0; if (Prev) - Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev); + Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev); F->Offset = Offset; LastValidFragment[F->getParent()] = F; @@ -390,7 +390,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, ++stats::EmittedFragments; // FIXME: Embed in fragments instead? - uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F); + uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F); switch (F.getKind()) { case MCFragment::FT_Align: { MCAlignFragment &AF = cast<MCAlignFragment>(F); @@ -493,7 +493,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, assert(OW->getStream().tell() - Start == FragmentSize); } -void MCAssembler::WriteSectionData(const MCSectionData *SD, +void MCAssembler::writeSectionData(const MCSectionData *SD, const MCAsmLayout &Layout) const { // Ignore virtual sections. if (SD->getSection().isVirtualSection()) { @@ -546,13 +546,13 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD, } -uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout, +uint64_t MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F, const MCFixup &Fixup) { // Evaluate the fixup. MCValue Target; uint64_t FixedValue; - if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) { + if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) { // The fixup was unresolved, we need a relocation. Inform the object // writer of the relocation, and give it an opportunity to adjust the // fixup value if need be. @@ -592,7 +592,7 @@ void MCAssembler::Finish() { } // Layout until everything fits. - while (LayoutOnce(Layout)) + while (layoutOnce(Layout)) continue; DEBUG_WITH_TYPE("mc-dump", { @@ -600,7 +600,7 @@ void MCAssembler::Finish() { dump(); }); // Finalize the layout, including fragment lowering. - FinishLayout(Layout); + finishLayout(Layout); DEBUG_WITH_TYPE("mc-dump", { llvm::errs() << "assembler backend - final-layout\n--\n"; @@ -621,7 +621,7 @@ void MCAssembler::Finish() { for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(), ie3 = DF->fixup_end(); it3 != ie3; ++it3) { MCFixup &Fixup = *it3; - uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup); + uint64_t FixedValue = handleFixup(Layout, *DF, Fixup); getBackend().ApplyFixup(Fixup, DF->getContents().data(), DF->getContents().size(), FixedValue); } @@ -631,7 +631,7 @@ void MCAssembler::Finish() { for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(), ie3 = IF->fixup_end(); it3 != ie3; ++it3) { MCFixup &Fixup = *it3; - uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup); + uint64_t FixedValue = handleFixup(Layout, *IF, Fixup); getBackend().ApplyFixup(Fixup, IF->getCode().data(), IF->getCode().size(), FixedValue); } @@ -645,8 +645,8 @@ void MCAssembler::Finish() { stats::ObjectBytes += OS.tell() - StartOffset; } -bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup, - const MCFragment *DF, +bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup, + const MCInstFragment *DF, const MCAsmLayout &Layout) const { if (getRelaxAll()) return true; @@ -654,16 +654,13 @@ bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup, // If we cannot resolve the fixup value, it requires relaxation. MCValue Target; uint64_t Value; - if (!EvaluateFixup(Layout, Fixup, DF, Target, Value)) + if (!evaluateFixup(Layout, Fixup, DF, Target, Value)) return true; - // Otherwise, relax if the value is too big for a (signed) i8. - // - // FIXME: This is target dependent! - return int64_t(Value) != int64_t(int8_t(Value)); + return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout); } -bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF, +bool MCAssembler::fragmentNeedsRelaxation(const MCInstFragment *IF, const MCAsmLayout &Layout) const { // If this inst doesn't ever need relaxation, ignore it. This occurs when we // are intentionally pushing out inst fragments, or because we relaxed a @@ -673,15 +670,15 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF, for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(), ie = IF->fixup_end(); it != ie; ++it) - if (FixupNeedsRelaxation(*it, IF, Layout)) + if (fixupNeedsRelaxation(*it, IF, Layout)) return true; return false; } -bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout, +bool MCAssembler::relaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF) { - if (!FragmentNeedsRelaxation(&IF, Layout)) + if (!fragmentNeedsRelaxation(&IF, Layout)) return false; ++stats::RelaxedInstructions; @@ -715,7 +712,7 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout, return true; } -bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { +bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { int64_t Value = 0; uint64_t OldSize = LF.getContents().size(); bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout); @@ -732,8 +729,8 @@ bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { return OldSize != LF.getContents().size(); } -bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout, - MCDwarfLineAddrFragment &DF) { +bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, + MCDwarfLineAddrFragment &DF) { int64_t AddrDelta = 0; uint64_t OldSize = DF.getContents().size(); bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout); @@ -749,7 +746,7 @@ bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout, return OldSize != Data.size(); } -bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout, +bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout, MCDwarfCallFrameFragment &DF) { int64_t AddrDelta = 0; uint64_t OldSize = DF.getContents().size(); @@ -764,7 +761,7 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout, return OldSize != Data.size(); } -bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout, +bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) { MCFragment *FirstInvalidFragment = NULL; // Scan for fragments that need relaxation. @@ -776,19 +773,19 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout, default: break; case MCFragment::FT_Inst: - relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2)); + relaxedFrag = relaxInstruction(Layout, *cast<MCInstFragment>(it2)); break; case MCFragment::FT_Dwarf: - relaxedFrag = RelaxDwarfLineAddr(Layout, + relaxedFrag = relaxDwarfLineAddr(Layout, *cast<MCDwarfLineAddrFragment>(it2)); break; case MCFragment::FT_DwarfFrame: relaxedFrag = - RelaxDwarfCallFrameFragment(Layout, + relaxDwarfCallFrameFragment(Layout, *cast<MCDwarfCallFrameFragment>(it2)); break; case MCFragment::FT_LEB: - relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2)); + relaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(it2)); break; } // Update the layout, and remember that we relaxed. @@ -802,20 +799,20 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout, return false; } -bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { +bool MCAssembler::layoutOnce(MCAsmLayout &Layout) { ++stats::RelaxationSteps; bool WasRelaxed = false; for (iterator it = begin(), ie = end(); it != ie; ++it) { MCSectionData &SD = *it; - while(LayoutSectionOnce(Layout, SD)) + while(layoutSectionOnce(Layout, SD)) WasRelaxed = true; } return WasRelaxed; } -void MCAssembler::FinishLayout(MCAsmLayout &Layout) { +void MCAssembler::finishLayout(MCAsmLayout &Layout) { // The layout is done. Mark every fragment as valid. for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin()); diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt index 5cf5f1b..5e2cd83 100644 --- a/lib/MC/MCDisassembler/CMakeLists.txt +++ b/lib/MC/MCDisassembler/CMakeLists.txt @@ -2,12 +2,7 @@ add_llvm_library(LLVMMCDisassembler Disassembler.cpp EDDisassembler.cpp EDInst.cpp + EDMain.cpp EDOperand.cpp EDToken.cpp ) - -add_llvm_library_dependencies(LLVMMCDisassembler - LLVMMC - LLVMMCParser - LLVMSupport - ) diff --git a/lib/MC/MCDisassembler/EDMain.cpp b/lib/MC/MCDisassembler/EDMain.cpp new file mode 100644 index 0000000..3fd355b --- /dev/null +++ b/lib/MC/MCDisassembler/EDMain.cpp @@ -0,0 +1,280 @@ +//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the enhanced disassembler's public C API. +// +//===----------------------------------------------------------------------===// + +#include "EDDisassembler.h" +#include "EDInst.h" +#include "EDOperand.h" +#include "EDToken.h" +#include "llvm-c/EnhancedDisassembly.h" +using namespace llvm; + +int EDGetDisassembler(EDDisassemblerRef *disassembler, + const char *triple, + EDAssemblySyntax_t syntax) { + EDDisassembler::AssemblySyntax Syntax; + switch (syntax) { + default: assert(0 && "Unknown assembly syntax!"); + case kEDAssemblySyntaxX86Intel: + Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel; + break; + case kEDAssemblySyntaxX86ATT: + Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT; + break; + case kEDAssemblySyntaxARMUAL: + Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL; + break; + } + + EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax); + + if (!ret) + return -1; + *disassembler = ret; + return 0; +} + +int EDGetRegisterName(const char** regName, + EDDisassemblerRef disassembler, + unsigned regID) { + const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID); + if (!name) + return -1; + *regName = name; + return 0; +} + +int EDRegisterIsStackPointer(EDDisassemblerRef disassembler, + unsigned regID) { + return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0; +} + +int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler, + unsigned regID) { + return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0; +} + +unsigned int EDCreateInsts(EDInstRef *insts, + unsigned int count, + EDDisassemblerRef disassembler, + ::EDByteReaderCallback byteReader, + uint64_t address, + void *arg) { + unsigned int index; + + for (index = 0; index < count; ++index) { + EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader, + address, arg); + + if (!inst) + return index; + + insts[index] = inst; + address += inst->byteSize(); + } + + return count; +} + +void EDReleaseInst(EDInstRef inst) { + delete ((EDInst*)inst); +} + +int EDInstByteSize(EDInstRef inst) { + return ((EDInst*)inst)->byteSize(); +} + +int EDGetInstString(const char **buf, + EDInstRef inst) { + return ((EDInst*)inst)->getString(*buf); +} + +int EDInstID(unsigned *instID, EDInstRef inst) { + *instID = ((EDInst*)inst)->instID(); + return 0; +} + +int EDInstIsBranch(EDInstRef inst) { + return ((EDInst*)inst)->isBranch(); +} + +int EDInstIsMove(EDInstRef inst) { + return ((EDInst*)inst)->isMove(); +} + +int EDBranchTargetID(EDInstRef inst) { + return ((EDInst*)inst)->branchTargetID(); +} + +int EDMoveSourceID(EDInstRef inst) { + return ((EDInst*)inst)->moveSourceID(); +} + +int EDMoveTargetID(EDInstRef inst) { + return ((EDInst*)inst)->moveTargetID(); +} + +int EDNumTokens(EDInstRef inst) { + return ((EDInst*)inst)->numTokens(); +} + +int EDGetToken(EDTokenRef *token, + EDInstRef inst, + int index) { + return ((EDInst*)inst)->getToken(*(EDToken**)token, index); +} + +int EDGetTokenString(const char **buf, + EDTokenRef token) { + return ((EDToken*)token)->getString(*buf); +} + +int EDOperandIndexForToken(EDTokenRef token) { + return ((EDToken*)token)->operandID(); +} + +int EDTokenIsWhitespace(EDTokenRef token) { + return ((EDToken*)token)->type() == EDToken::kTokenWhitespace; +} + +int EDTokenIsPunctuation(EDTokenRef token) { + return ((EDToken*)token)->type() == EDToken::kTokenPunctuation; +} + +int EDTokenIsOpcode(EDTokenRef token) { + return ((EDToken*)token)->type() == EDToken::kTokenOpcode; +} + +int EDTokenIsLiteral(EDTokenRef token) { + return ((EDToken*)token)->type() == EDToken::kTokenLiteral; +} + +int EDTokenIsRegister(EDTokenRef token) { + return ((EDToken*)token)->type() == EDToken::kTokenRegister; +} + +int EDTokenIsNegativeLiteral(EDTokenRef token) { + if (((EDToken*)token)->type() != EDToken::kTokenLiteral) + return -1; + + return ((EDToken*)token)->literalSign(); +} + +int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) { + if (((EDToken*)token)->type() != EDToken::kTokenLiteral) + return -1; + + return ((EDToken*)token)->literalAbsoluteValue(*value); +} + +int EDRegisterTokenValue(unsigned *registerID, + EDTokenRef token) { + if (((EDToken*)token)->type() != EDToken::kTokenRegister) + return -1; + + return ((EDToken*)token)->registerID(*registerID); +} + +int EDNumOperands(EDInstRef inst) { + return ((EDInst*)inst)->numOperands(); +} + +int EDGetOperand(EDOperandRef *operand, + EDInstRef inst, + int index) { + return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index); +} + +int EDOperandIsRegister(EDOperandRef operand) { + return ((EDOperand*)operand)->isRegister(); +} + +int EDOperandIsImmediate(EDOperandRef operand) { + return ((EDOperand*)operand)->isImmediate(); +} + +int EDOperandIsMemory(EDOperandRef operand) { + return ((EDOperand*)operand)->isMemory(); +} + +int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) { + if (!((EDOperand*)operand)->isRegister()) + return -1; + *value = ((EDOperand*)operand)->regVal(); + return 0; +} + +int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) { + if (!((EDOperand*)operand)->isImmediate()) + return -1; + *value = ((EDOperand*)operand)->immediateVal(); + return 0; +} + +int EDEvaluateOperand(uint64_t *result, EDOperandRef operand, + ::EDRegisterReaderCallback regReader, void *arg) { + return ((EDOperand*)operand)->evaluate(*result, regReader, arg); +} + +#ifdef __BLOCKS__ + +struct ByteReaderWrapper { + EDByteBlock_t byteBlock; +}; + +static int readerWrapperCallback(uint8_t *byte, + uint64_t address, + void *arg) { + struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg; + return wrapper->byteBlock(byte, address); +} + +unsigned int EDBlockCreateInsts(EDInstRef *insts, + int count, + EDDisassemblerRef disassembler, + EDByteBlock_t byteBlock, + uint64_t address) { + struct ByteReaderWrapper wrapper; + wrapper.byteBlock = byteBlock; + + return EDCreateInsts(insts, + count, + disassembler, + readerWrapperCallback, + address, + (void*)&wrapper); +} + +int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand, + EDRegisterBlock_t regBlock) { + return ((EDOperand*)operand)->evaluate(*result, regBlock); +} + +int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) { + return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor); +} + +#else + +extern "C" unsigned int EDBlockCreateInsts() { + return 0; +} + +extern "C" int EDBlockEvaluateOperand() { + return -1; +} + +extern "C" int EDBlockVisitTokens() { + return -1; +} + +#endif diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index d2bbd7d..46ab65f 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -19,9 +19,12 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" using namespace llvm; // Given a special op, return the address skip amount (in units of @@ -423,6 +426,342 @@ void MCDwarfFile::dump() const { print(dbgs()); } +// Utility function to write a tuple for .debug_abbrev. +static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) { + MCOS->EmitULEB128IntValue(Name); + MCOS->EmitULEB128IntValue(Form); +} + +// When generating dwarf for assembly source files this emits +// the data for .debug_abbrev section which contains three DIEs. +static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { + MCContext &context = MCOS->getContext(); + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); + + // DW_TAG_compile_unit DIE abbrev (1). + MCOS->EmitULEB128IntValue(1); + MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit); + MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1); + EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4); + EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); + EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); + EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string); + EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string); + StringRef DwarfDebugFlags = context.getDwarfDebugFlags(); + if (!DwarfDebugFlags.empty()) + EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string); + EmitAbbrev(MCOS, dwarf::DW_AT_producer, dwarf::DW_FORM_string); + EmitAbbrev(MCOS, dwarf::DW_AT_language, dwarf::DW_FORM_data2); + EmitAbbrev(MCOS, 0, 0); + + // DW_TAG_subprogram DIE abbrev (2). + MCOS->EmitULEB128IntValue(2); + MCOS->EmitULEB128IntValue(dwarf::DW_TAG_subprogram); + MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1); + EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string); + EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4); + EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4); + EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); + EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); + EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag); + EmitAbbrev(MCOS, 0, 0); + + // DW_TAG_unspecified_parameters DIE abbrev (3). + MCOS->EmitULEB128IntValue(3); + MCOS->EmitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters); + MCOS->EmitIntValue(dwarf::DW_CHILDREN_no, 1); + EmitAbbrev(MCOS, 0, 0); + + // Terminate the abbreviations for this compilation unit. + MCOS->EmitIntValue(0, 1); +} + +// When generating dwarf for assembly source files this emits the data for +// .debug_aranges section. Which contains a header and a table of pairs of +// PointerSize'ed values for the address and size of section(s) with line table +// entries (just the default .text in our case) and a terminating pair of zeros. +static void EmitGenDwarfAranges(MCStreamer *MCOS) { + MCContext &context = MCOS->getContext(); + + // Create a symbol at the end of the section that we are creating the dwarf + // debugging info to use later in here as part of the expression to calculate + // the size of the section for the table. + MCOS->SwitchSection(context.getGenDwarfSection()); + MCSymbol *SectionEndSym = context.CreateTempSymbol(); + MCOS->EmitLabel(SectionEndSym); + context.setGenDwarfSectionEndSym(SectionEndSym); + + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); + + // This will be the length of the .debug_aranges section, first account for + // the size of each item in the header (see below where we emit these items). + int Length = 4 + 2 + 4 + 1 + 1; + + // Figure the padding after the header before the table of address and size + // pairs who's values are PointerSize'ed. + const MCAsmInfo &asmInfo = context.getAsmInfo(); + int AddrSize = asmInfo.getPointerSize(); + int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1)); + if (Pad == 2 * AddrSize) + Pad = 0; + Length += Pad; + + // Add the size of the pair of PointerSize'ed values for the address and size + // of the one default .text section we have in the table. + Length += 2 * AddrSize; + // And the pair of terminating zeros. + Length += 2 * AddrSize; + + + // Emit the header for this section. + // The 4 byte length not including the 4 byte value for the length. + MCOS->EmitIntValue(Length - 4, 4); + // The 2 byte version, which is 2. + MCOS->EmitIntValue(2, 2); + // The 4 byte offset to the compile unit in the .debug_info from the start + // of the .debug_info, it is at the start of that section so this is zero. + MCOS->EmitIntValue(0, 4); + // The 1 byte size of an address. + MCOS->EmitIntValue(AddrSize, 1); + // The 1 byte size of a segment descriptor, we use a value of zero. + MCOS->EmitIntValue(0, 1); + // Align the header with the padding if needed, before we put out the table. + for(int i = 0; i < Pad; i++) + MCOS->EmitIntValue(0, 1); + + // Now emit the table of pairs of PointerSize'ed values for the section(s) + // address and size, in our case just the one default .text section. + const MCExpr *Addr = MCSymbolRefExpr::Create( + context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context); + const MCExpr *Size = MakeStartMinusEndExpr(*MCOS, + *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0); + MCOS->EmitAbsValue(Addr, AddrSize); + MCOS->EmitAbsValue(Size, AddrSize); + + // And finally the pair of terminating zeros. + MCOS->EmitIntValue(0, AddrSize); + MCOS->EmitIntValue(0, AddrSize); +} + +// When generating dwarf for assembly source files this emits the data for +// .debug_info section which contains three parts. The header, the compile_unit +// DIE and a list of subprogram DIEs. +static void EmitGenDwarfInfo(MCStreamer *MCOS) { + MCContext &context = MCOS->getContext(); + + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); + + // Create a symbol at the start and end of this section used in here for the + // expression to calculate the length in the header. + MCSymbol *InfoStart = context.CreateTempSymbol(); + MCOS->EmitLabel(InfoStart); + MCSymbol *InfoEnd = context.CreateTempSymbol(); + + // First part: the header. + + // The 4 byte total length of the information for this compilation unit, not + // including these 4 bytes. + const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4); + MCOS->EmitAbsValue(Length, 4); + + // The 2 byte DWARF version, which is 2. + MCOS->EmitIntValue(2, 2); + + // The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev, + // it is at the start of that section so this is zero. + MCOS->EmitIntValue(0, 4); + + const MCAsmInfo &asmInfo = context.getAsmInfo(); + int AddrSize = asmInfo.getPointerSize(); + // The 1 byte size of an address. + MCOS->EmitIntValue(AddrSize, 1); + + // Second part: the compile_unit DIE. + + // The DW_TAG_compile_unit DIE abbrev (1). + MCOS->EmitULEB128IntValue(1); + + // DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section, + // which is at the start of that section so this is zero. + MCOS->EmitIntValue(0, 4); + + // AT_low_pc, the first address of the default .text section. + const MCExpr *Start = MCSymbolRefExpr::Create( + context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context); + MCOS->EmitAbsValue(Start, AddrSize); + + // AT_high_pc, the last address of the default .text section. + const MCExpr *End = MCSymbolRefExpr::Create( + context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context); + MCOS->EmitAbsValue(End, AddrSize); + + // AT_name, the name of the source file. Reconstruct from the first directory + // and file table entries. + const std::vector<StringRef> &MCDwarfDirs = + context.getMCDwarfDirs(); + if (MCDwarfDirs.size() > 0) { + MCOS->EmitBytes(MCDwarfDirs[0], 0); + MCOS->EmitBytes("/", 0); + } + const std::vector<MCDwarfFile *> &MCDwarfFiles = + MCOS->getContext().getMCDwarfFiles(); + MCOS->EmitBytes(MCDwarfFiles[1]->getName(), 0); + MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + + // AT_comp_dir, the working directory the assembly was done in. + llvm::sys::Path CWD = llvm::sys::Path::GetCurrentDirectory(); + MCOS->EmitBytes(StringRef(CWD.c_str()), 0); + MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + + // AT_APPLE_flags, the command line arguments of the assembler tool. + StringRef DwarfDebugFlags = context.getDwarfDebugFlags(); + if (!DwarfDebugFlags.empty()){ + MCOS->EmitBytes(DwarfDebugFlags, 0); + MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + } + + // AT_producer, the version of the assembler tool. + MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "), 0); + MCOS->EmitBytes(StringRef(PACKAGE_VERSION), 0); + MCOS->EmitBytes(StringRef(")"), 0); + MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + + // AT_language, a 4 byte value. We use DW_LANG_Mips_Assembler as the dwarf2 + // draft has no standard code for assembler. + MCOS->EmitIntValue(dwarf::DW_LANG_Mips_Assembler, 2); + + // Third part: the list of subprogram DIEs. + + // Loop on saved info for dwarf subprograms and create the DIEs for them. + const std::vector<const MCGenDwarfSubprogramEntry *> &Entries = + MCOS->getContext().getMCGenDwarfSubprogramEntries(); + for (std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator it = + Entries.begin(), ie = Entries.end(); it != ie; + ++it) { + const MCGenDwarfSubprogramEntry *Entry = *it; + + // The DW_TAG_subprogram DIE abbrev (2). + MCOS->EmitULEB128IntValue(2); + + // AT_name, of the label without any leading underbar. + MCOS->EmitBytes(Entry->getName(), 0); + MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + + // AT_decl_file, index into the file table. + MCOS->EmitIntValue(Entry->getFileNumber(), 4); + + // AT_decl_line, source line number. + MCOS->EmitIntValue(Entry->getLineNumber(), 4); + + // AT_low_pc, start address of the label. + const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(), + MCSymbolRefExpr::VK_None, context); + MCOS->EmitAbsValue(AT_low_pc, AddrSize); + + // AT_high_pc, end address which is the next label or end of the section. + std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator next = it+1; + if (next != Entries.end()){ + const MCGenDwarfSubprogramEntry *NextEntry = *next; + const MCExpr *AT_high_pc = MCSymbolRefExpr::Create(NextEntry->getLabel(), + MCSymbolRefExpr::VK_None, context); + MCOS->EmitAbsValue(AT_high_pc, AddrSize); + } else { + MCOS->EmitAbsValue(End, AddrSize); + } + + // DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype. + MCOS->EmitIntValue(0, 1); + + // The DW_TAG_unspecified_parameters DIE abbrev (3). + MCOS->EmitULEB128IntValue(3); + + // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's. + MCOS->EmitIntValue(0, 1); + } + // Deallocate the MCGenDwarfSubprogramEntry classes that saved away the info + // for the dwarf subprograms. + for (std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator it = + Entries.begin(), ie = Entries.end(); it != ie; + ++it) { + const MCGenDwarfSubprogramEntry *Entry = *it; + delete Entry; + } + + // Add the NULL DIE terminating the Compile Unit DIE's. + MCOS->EmitIntValue(0, 1); + + // Now set the value of the symbol at the end of the info section. + MCOS->EmitLabel(InfoEnd); +} + +// +// When generating dwarf for assembly source files this emits the Dwarf +// sections. +// +void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { + // Create the dwarf sections in this order (.debug_line already created). + MCContext &context = MCOS->getContext(); + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection()); + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); + + // If there are no line table entries then do not emit any section contents. + if (context.getMCLineSections().empty()) + return; + + // Output the data for .debug_aranges section. + EmitGenDwarfAranges(MCOS); + + // Output the data for .debug_abbrev section. + EmitGenDwarfAbbrev(MCOS); + + // Output the data for .debug_info section. + EmitGenDwarfInfo(MCOS); +} + +// +// When generating dwarf for assembly source files this is called when symbol +// for a label is created. If this symbol is not a temporary and is in the +// section that dwarf is being generated for, save the needed info to create +// a dwarf subprogram. +// +void MCGenDwarfSubprogramEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, + SourceMgr &SrcMgr, SMLoc &Loc) { + // We won't create dwarf subprogram's for temporary symbols or symbols not in + // the default text. + if (Symbol->isTemporary()) + return; + MCContext &context = MCOS->getContext(); + if (context.getGenDwarfSection() != MCOS->getCurrentSection()) + return; + + // The dwarf subprogram's name does not have the symbol name's leading + // underbar if any. + StringRef Name = Symbol->getName(); + if (Name.startswith("_")) + Name = Name.substr(1, Name.size()-1); + + // Get the dwarf file number to be used for the dwarf subprogram. + unsigned FileNumber = context.getGenDwarfFileNumber(); + + // Finding the line number is the expensive part which is why we just don't + // pass it in as for some symbols we won't create a dwarf subprogram. + int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc); + unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer); + + // We create a temporary symbol for use for the AT_high_pc and AT_low_pc + // values so that they don't have things like an ARM thumb bit from the + // original symbol. So when used they won't get a low bit set after + // relocation. + MCSymbol *Label = context.CreateTempSymbol(); + MCOS->EmitLabel(Label); + + // Create and entry for the info and add it to the other entries. + MCGenDwarfSubprogramEntry *Entry = + new MCGenDwarfSubprogramEntry(Name, FileNumber, LineNumber, Label); + MCOS->getContext().addMCGenDwarfSubprogramEntry(Entry); +} + static int getDataAlignmentFactor(MCStreamer &streamer) { MCContext &context = streamer.getContext(); const MCAsmInfo &asmInfo = context.getAsmInfo(); @@ -1009,10 +1348,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos(); // Emit the compact unwind info if available. - // FIXME: This emits both the compact unwind and the old CIE/FDE - // information. Only one of those is needed. - // FIXME: Disable. This seems to still be causing failures. - if (false && IsEH && MOFI->getCompactUnwindSection()) + if (IsEH && MOFI->getCompactUnwindSection()) for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); if (Frame.CompactUnwindEncoding) diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp index dad2e7b..f9f98e0 100644 --- a/lib/MC/MCELF.cpp +++ b/lib/MC/MCELF.cpp @@ -37,7 +37,7 @@ void MCELF::SetType(MCSymbolData &SD, unsigned Type) { assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS); + Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC); uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift); SD.setFlags(OtherFlags | (Type << ELF_STT_Shift)); @@ -48,7 +48,7 @@ unsigned MCELF::GetType(const MCSymbolData &SD) { assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS); + Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC); return Type; } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 0ea3c64..dcc4666 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -130,7 +130,6 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_WeakDefinition: case MCSA_WeakDefAutoPrivate: case MCSA_Invalid: - case MCSA_ELF_TypeIndFunction: case MCSA_IndirectSymbol: assert(0 && "Invalid symbol attribute for ELF!"); break; @@ -162,6 +161,10 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCELF::SetType(SD, ELF::STT_FUNC); break; + case MCSA_ELF_TypeIndFunction: + MCELF::SetType(SD, ELF::STT_GNU_IFUNC); + break; + case MCSA_ELF_TypeObject: MCELF::SetType(SD, ELF::STT_OBJECT); break; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index aa35815..50ab1f8 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -156,8 +156,6 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { } void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) { - // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa. - // Remember that the function is a thumb function. Fixup and relocation // values will need adjusted. getAssembler().setIsThumbFunc(Symbol); diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 7d23541..32ba924 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -31,6 +31,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) CommDirectiveSupportsAlignment = false; + StructorOutputOrder = Structors::PriorityOrder; + TextSection // .text = Ctx->getMachOSection("__TEXT", "__text", MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, @@ -258,6 +260,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { } } + StructorOutputOrder = Structors::ReversePriorityOrder; + // ELF BSSSection = Ctx->getELFSection(".bss", ELF::SHT_NOBITS, @@ -385,6 +389,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { // COFF + StructorOutputOrder = Structors::ReversePriorityOrder; + TextSection = Ctx->getCOFFSection(".text", COFF::IMAGE_SCN_CNT_CODE | diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 90c957f..663d0ca 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -260,5 +260,9 @@ void MCObjectStreamer::Finish() { if (getContext().hasDwarfFiles()) MCDwarfFileTable::Emit(this); + // If we are generating dwarf for assembly source files dump out the sections. + if (getContext().getGenDwarfForAssembly()) + MCGenDwarfInfo::Emit(this); + getAssembler().Finish(); } diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 7883893..aac020d 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -181,6 +181,9 @@ private: /// EnterIncludeFile - Enter the specified file. This returns true on failure. bool EnterIncludeFile(const std::string &Filename); + /// ProcessIncbinFile - Process the specified file for the .incbin directive. + /// This returns true on failure. + bool ProcessIncbinFile(const std::string &Filename); /// \brief Reset the current lexer position to that given by \arg Loc. The /// current token is not set; clients should ensure Lex() is called @@ -227,6 +230,7 @@ private: bool ParseDirectiveAbort(); // ".abort" bool ParseDirectiveInclude(); // ".include" + bool ParseDirectiveIncbin(); // ".incbin" bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" // ".ifdef" or ".ifndef", depending on expect_defined @@ -429,6 +433,21 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) { return false; } +/// Process the specified .incbin file by seaching for it in the include paths +/// then just emiting the byte contents of the file to the streamer. This +/// returns true on failure. +bool AsmParser::ProcessIncbinFile(const std::string &Filename) { + std::string IncludedFile; + int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); + if (NewBuf == -1) + return true; + + // Pick up the bytes from the file and emit them. + getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer(), + DEFAULT_ADDRSPACE); + return false; +} + void AsmParser::JumpToLoc(SMLoc Loc) { CurBuffer = SrcMgr.FindBufferContainingLoc(Loc); Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer()); @@ -468,6 +487,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // section and generate a .file directive. if (getContext().getGenDwarfForAssembly()) { getContext().setGenDwarfSection(getStreamer().getCurrentSection()); + MCSymbol *SectionStartSym = getContext().CreateTempSymbol(); + getStreamer().EmitLabel(SectionStartSym); + getContext().setGenDwarfSectionStartSym(SectionStartSym); getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(), StringRef(), SrcMgr.getMemoryBuffer(CurBuffer)->getBufferIdentifier()); } @@ -1047,6 +1069,12 @@ bool AsmParser::ParseStatement() { // Emit the label. Out.EmitLabel(Sym); + // If we are generating dwarf for assembly source files then gather the + // info to make a dwarf subprogram entry for this label if needed. + if (getContext().getGenDwarfForAssembly()) + MCGenDwarfSubprogramEntry::Make(Sym, &getStreamer(), getSourceManager(), + IDLoc); + // Consume any end of statement token, if present, to avoid spurious // AddBlankLine calls(). if (Lexer.is(AsmToken::EndOfStatement)) { @@ -1174,6 +1202,8 @@ bool AsmParser::ParseStatement() { return ParseDirectiveAbort(); if (IDVal == ".include") return ParseDirectiveInclude(); + if (IDVal == ".incbin") + return ParseDirectiveIncbin(); if (IDVal == ".code16") return TokError(Twine(IDVal) + " not supported yet"); @@ -2197,6 +2227,31 @@ bool AsmParser::ParseDirectiveInclude() { return false; } +/// ParseDirectiveIncbin +/// ::= .incbin "filename" +bool AsmParser::ParseDirectiveIncbin() { + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.incbin' directive"); + + std::string Filename = getTok().getString(); + SMLoc IncbinLoc = getLexer().getLoc(); + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.incbin' directive"); + + // Strip the quotes. + Filename = Filename.substr(1, Filename.size()-2); + + // Attempt to process the included file. + if (ProcessIncbinFile(Filename)) { + Error(IncbinLoc, "Could not find incbin file '" + Filename + "'"); + return true; + } + + return false; +} + /// ParseDirectiveIf /// ::= .if expression bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt index 299d281..222f237 100644 --- a/lib/MC/MCParser/CMakeLists.txt +++ b/lib/MC/MCParser/CMakeLists.txt @@ -9,8 +9,3 @@ add_llvm_library(LLVMMCParser MCAsmParserExtension.cpp MCTargetAsmParser.cpp ) - -add_llvm_library_dependencies(LLVMMCParser - LLVMMC - LLVMSupport - ) diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index d891126..ffc400b 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -476,6 +476,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { .Case("common", MCSA_ELF_TypeCommon) .Case("notype", MCSA_ELF_TypeNoType) .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) + .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction) .Default(MCSA_Invalid); if (Attr == MCSA_Invalid) diff --git a/lib/MC/MCParser/LLVMBuild.txt b/lib/MC/MCParser/LLVMBuild.txt index 83146a9..bcb0feb 100644 --- a/lib/MC/MCParser/LLVMBuild.txt +++ b/lib/MC/MCParser/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = MCParser parent = MC required_libraries = MC Support - diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index a9219ad..e016f09 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -584,9 +584,14 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, // requires the compiler to use .set to absolutize the differences between // symbols which the compiler knows to be assembly time constants, so we // don't need to worry about considering symbol differences fully resolved. + // + // If the file isn't using sub-sections-via-symbols, we can make the + // same assumptions about any symbol that we normally make about + // assembler locals. if (!Asm.getBackend().hasReliableSymbolDifference()) { - if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB) + if ((!SA.isTemporary() && Asm.getSubsectionsViaSymbols()) || + !SA.isInSection() || &SecA != &SecB) return false; return true; } @@ -628,7 +633,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, } void MachObjectWriter::WriteObject(MCAssembler &Asm, - const MCAsmLayout &Layout) { + const MCAsmLayout &Layout) { unsigned NumSections = Asm.size(); // The section data starts after the header, the segment load command (and @@ -731,7 +736,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, // Write the actual section data. for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { - Asm.WriteSectionData(it, Layout); + Asm.writeSectionData(it, Layout); uint64_t Pad = getPaddingSize(it, Layout); for (unsigned int i = 0; i < Pad; ++i) diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 4d3b59c..4052374 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -850,7 +850,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, assert(OS.tell() == (*i)->Header.PointerToRawData && "Section::PointerToRawData is insane!"); - Asm.WriteSectionData(j, Layout); + Asm.writeSectionData(j, Layout); } if ((*i)->Relocations.size() > 0) { diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 86eb51a..c20fc0c 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -9,8 +9,3 @@ add_llvm_library(LLVMObject Object.cpp ObjectFile.cpp ) - -add_llvm_library_dependencies(LLVMObject - LLVMCore - LLVMSupport - ) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index c6ce562..bdf5431 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -101,7 +101,7 @@ error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb, return getSymbolName(symb, Result); } -error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb, +error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb, uint64_t &Result) const { const coff_symbol *symb = toSymb(Symb); const coff_section *Section = NULL; @@ -113,7 +113,7 @@ error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb, if (Type == 'U' || Type == 'w') Result = UnknownAddressOrSize; else if (Section) - Result = Section->VirtualAddress + symb->Value; + Result = Section->PointerToRawData + symb->Value; else Result = symb->Value; return object_error::success; @@ -131,11 +131,9 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb, if (Type == 'U' || Type == 'w') Result = UnknownAddressOrSize; else if (Section) - Result = reinterpret_cast<uintptr_t>(base() + - Section->PointerToRawData + - symb->Value); + Result = Section->VirtualAddress + symb->Value; else - Result = reinterpret_cast<uintptr_t>(base() + symb->Value); + Result = symb->Value; return object_error::success; } @@ -283,7 +281,7 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb, if (symb->SectionNumber <= COFF::IMAGE_SYM_UNDEFINED) Result = end_sections(); else { - const coff_section *sec; + const coff_section *sec = 0; if (error_code ec = getSection(symb->SectionNumber, sec)) return ec; DataRefImpl Sec; std::memset(&Sec, 0, sizeof(Sec)); @@ -389,7 +387,7 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec, bool &Result) const { const coff_section *sec = toSec(Sec); const coff_symbol *symb = toSymb(Symb); - const coff_section *symb_sec; + const coff_section *symb_sec = 0; if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec; if (symb_sec == sec) Result = true; @@ -624,6 +622,11 @@ error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel, Res = toRel(Rel)->VirtualAddress; return object_error::success; } +error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const { + Res = toRel(Rel)->VirtualAddress; + return object_error::success; +} error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const { const coff_relocation* R = toRel(Rel); diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index d1a43e7..a6c4c25 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -325,7 +325,7 @@ class ELFObjectFile : public ObjectFile { protected: virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const; virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const; - virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const; + virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const; virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const; @@ -355,6 +355,8 @@ protected: RelocationRef &Res) const; virtual error_code getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const; + virtual error_code getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const; virtual error_code getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const; virtual error_code getRelocationType(DataRefImpl Rel, @@ -462,7 +464,7 @@ ELFObjectFile<target_endianness, is64Bits> template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> - ::getSymbolOffset(DataRefImpl Symb, + ::getSymbolFileOffset(DataRefImpl Symb, uint64_t &Result) const { validateSymbol(Symb); const Elf_Sym *symb = getSymbol(Symb); @@ -486,7 +488,8 @@ error_code ELFObjectFile<target_endianness, is64Bits> case ELF::STT_FUNC: case ELF::STT_OBJECT: case ELF::STT_NOTYPE: - Result = symb->st_value; + Result = symb->st_value + + (Section ? Section->sh_offset - Section->sh_addr : 0); return object_error::success; default: Result = UnknownAddressOrSize; @@ -502,28 +505,25 @@ error_code ELFObjectFile<target_endianness, is64Bits> const Elf_Sym *symb = getSymbol(Symb); const Elf_Shdr *Section; switch (getSymbolTableIndex(symb)) { - case ELF::SHN_COMMON: // Fall through. + case ELF::SHN_COMMON: // Undefined symbols have no address yet. case ELF::SHN_UNDEF: Result = UnknownAddressOrSize; return object_error::success; case ELF::SHN_ABS: - Result = reinterpret_cast<uintptr_t>(base()+symb->st_value); + Result = symb->st_value; return object_error::success; default: Section = getSection(symb); } - const uint8_t* addr = base(); - if (Section) - addr += Section->sh_offset; + switch (symb->getType()) { case ELF::STT_SECTION: - Result = reinterpret_cast<uintptr_t>(addr); + Result = Section ? Section->sh_addr : UnknownAddressOrSize; return object_error::success; - case ELF::STT_FUNC: // Fall through. - case ELF::STT_OBJECT: // Fall through. + case ELF::STT_FUNC: + case ELF::STT_OBJECT: case ELF::STT_NOTYPE: - addr += symb->st_value; - Result = reinterpret_cast<uintptr_t>(addr); + Result = symb->st_value; return object_error::success; default: Result = UnknownAddressOrSize; @@ -922,6 +922,29 @@ error_code ELFObjectFile<target_endianness, is64Bits> template<support::endianness target_endianness, bool is64Bits> error_code ELFObjectFile<target_endianness, is64Bits> + ::getRelocationOffset(DataRefImpl Rel, + uint64_t &Result) const { + uint64_t offset; + const Elf_Shdr *sec = getSection(Rel.w.b); + switch (sec->sh_type) { + default : + report_fatal_error("Invalid section type in Rel!"); + case ELF::SHT_REL : { + offset = getRel(Rel)->r_offset; + break; + } + case ELF::SHT_RELA : { + offset = getRela(Rel)->r_offset; + break; + } + } + + Result = offset - sec->sh_addr; + return object_error::success; +} + +template<support::endianness target_endianness, bool is64Bits> +error_code ELFObjectFile<target_endianness, is64Bits> ::getRelocationType(DataRefImpl Rel, uint64_t &Result) const { const Elf_Shdr *sec = getSection(Rel.w.b); diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index 20fbb85..0041acd 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = Object parent = Libraries required_libraries = Core Support - diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 65ce5f8..4fa621b 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -125,23 +125,27 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl DRI, return object_error::success; } -error_code MachOObjectFile::getSymbolOffset(DataRefImpl DRI, - uint64_t &Result) const { - uint64_t SectionOffset; - uint8_t SectionIndex; +error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI, + uint64_t &Result) const { if (MachOObj->is64Bit()) { InMemoryStruct<macho::Symbol64TableEntry> Entry; getSymbol64TableEntry(DRI, Entry); Result = Entry->Value; - SectionIndex = Entry->SectionIndex; + if (Entry->SectionIndex) { + InMemoryStruct<macho::Section64> Section; + getSection64(Sections[Entry->SectionIndex-1], Section); + Result += Section->Offset - Section->Address; + } } else { InMemoryStruct<macho::SymbolTableEntry> Entry; getSymbolTableEntry(DRI, Entry); Result = Entry->Value; - SectionIndex = Entry->SectionIndex; + if (Entry->SectionIndex) { + InMemoryStruct<macho::Section> Section; + getSection(Sections[Entry->SectionIndex-1], Section); + Result += Section->Offset - Section->Address; + } } - getSectionAddress(Sections[SectionIndex-1], SectionOffset); - Result -= SectionOffset; return object_error::success; } @@ -162,7 +166,64 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI, error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, uint64_t &Result) const { - Result = UnknownAddressOrSize; + uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; + uint64_t BeginOffset; + uint64_t EndOffset = 0; + uint8_t SectionIndex; + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Symbol64TableEntry> Entry; + getSymbol64TableEntry(DRI, Entry); + BeginOffset = Entry->Value; + SectionIndex = Entry->SectionIndex; + if (!SectionIndex) { + Result = UnknownAddressOrSize; + return object_error::success; + } + // Unfortunately symbols are unsorted so we need to touch all + // symbols from load command + DRI.d.b = 0; + uint32_t Command = DRI.d.a; + while (Command == DRI.d.a) { + moveToNextSymbol(DRI); + if (DRI.d.a < LoadCommandCount) { + getSymbol64TableEntry(DRI, Entry); + if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset) + if (!EndOffset || Entry->Value < EndOffset) + EndOffset = Entry->Value; + } + DRI.d.b++; + } + } else { + InMemoryStruct<macho::SymbolTableEntry> Entry; + getSymbolTableEntry(DRI, Entry); + BeginOffset = Entry->Value; + SectionIndex = Entry->SectionIndex; + if (!SectionIndex) { + Result = UnknownAddressOrSize; + return object_error::success; + } + // Unfortunately symbols are unsorted so we need to touch all + // symbols from load command + DRI.d.b = 0; + uint32_t Command = DRI.d.a; + while (Command == DRI.d.a) { + moveToNextSymbol(DRI); + if (DRI.d.a < LoadCommandCount) { + getSymbolTableEntry(DRI, Entry); + if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset) + if (!EndOffset || Entry->Value < EndOffset) + EndOffset = Entry->Value; + } + DRI.d.b++; + } + } + if (!EndOffset) { + uint64_t Size; + getSectionSize(Sections[SectionIndex-1], Size); + getSectionAddress(Sections[SectionIndex-1], EndOffset); + EndOffset += Size; + } + Result = EndOffset - BeginOffset; return object_error::success; } @@ -275,7 +336,7 @@ error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb, if (index == 0) Res = end_sections(); else - Res = section_iterator(SectionRef(Sections[index], this)); + Res = section_iterator(SectionRef(Sections[index-1], this)); return object_error::success; } @@ -614,7 +675,7 @@ error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, bool isScattered = (Arch != Triple::x86_64) && (RE->Word0 & macho::RF_Scattered); uint64_t RelAddr = 0; - if (isScattered) + if (isScattered) RelAddr = RE->Word0 & 0xFFFFFF; else RelAddr = RE->Word0; @@ -622,6 +683,20 @@ error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr); return object_error::success; } +error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const { + InMemoryStruct<macho::RelocationEntry> RE; + getRelocation(Rel, RE); + + unsigned Arch = getArch(); + bool isScattered = (Arch != Triple::x86_64) && + (RE->Word0 & macho::RF_Scattered); + if (isScattered) + Res = RE->Word0 & 0xFFFFFF; + else + Res = RE->Word0; + return object_error::success; +} error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const { InMemoryStruct<macho::RelocationEntry> RE; diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 719bf88..f061ea7 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -150,9 +150,9 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { return ret; } -uint64_t LLVMGetSymbolOffset(LLVMSymbolIteratorRef SI) { +uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) { uint64_t ret; - if (error_code ec = (*unwrap(SI))->getOffset(ret)) + if (error_code ec = (*unwrap(SI))->getFileOffset(ret)) report_fatal_error(ec.message()); return ret; } @@ -172,6 +172,13 @@ uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) { return ret; } +uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) { + uint64_t ret; + if (error_code ec = (*unwrap(RI))->getOffset(ret)) + report_fatal_error(ec.message()); + return ret; +} + LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) { SymbolRef ret; if (error_code ec = (*unwrap(RI))->getSymbol(ret)) diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f238894..70e7afd 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1854,20 +1854,33 @@ APFloat::convert(const fltSemantics &toSemantics, lostFraction lostFraction; unsigned int newPartCount, oldPartCount; opStatus fs; + int shift; + const fltSemantics &fromSemantics = *semantics; - assertArithmeticOK(*semantics); + assertArithmeticOK(fromSemantics); assertArithmeticOK(toSemantics); lostFraction = lfExactlyZero; newPartCount = partCountForBits(toSemantics.precision + 1); oldPartCount = partCount(); + shift = toSemantics.precision - fromSemantics.precision; + + bool X86SpecialNan = false; + if (&fromSemantics == &APFloat::x87DoubleExtended && + &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN && + (!(*significandParts() & 0x8000000000000000ULL) || + !(*significandParts() & 0x4000000000000000ULL))) { + // x86 has some unusual NaNs which cannot be represented in any other + // format; note them here. + X86SpecialNan = true; + } + + // If this is a truncation, perform the shift before we narrow the storage. + if (shift < 0 && (category==fcNormal || category==fcNaN)) + lostFraction = shiftRight(significandParts(), oldPartCount, -shift); - /* Handle storage complications. If our new form is wider, - re-allocate our bit pattern into wider storage. If it is - narrower, we ignore the excess parts, but if narrowing to a - single part we need to free the old storage. - Be careful not to reference significandParts for zeroes - and infinities, since it aborts. */ + // Fix the storage so it can hold to new value. if (newPartCount > oldPartCount) { + // The new type requires more storage; make it available. integerPart *newParts; newParts = new integerPart[newPartCount]; APInt::tcSet(newParts, 0, newPartCount); @@ -1875,61 +1888,36 @@ APFloat::convert(const fltSemantics &toSemantics, APInt::tcAssign(newParts, significandParts(), oldPartCount); freeSignificand(); significand.parts = newParts; - } else if (newPartCount < oldPartCount) { - /* Capture any lost fraction through truncation of parts so we get - correct rounding whilst normalizing. */ - if (category==fcNormal) - lostFraction = lostFractionThroughTruncation - (significandParts(), oldPartCount, toSemantics.precision); - if (newPartCount == 1) { - integerPart newPart = 0; - if (category==fcNormal || category==fcNaN) - newPart = significandParts()[0]; - freeSignificand(); - significand.part = newPart; - } + } else if (newPartCount == 1 && oldPartCount != 1) { + // Switch to built-in storage for a single part. + integerPart newPart = 0; + if (category==fcNormal || category==fcNaN) + newPart = significandParts()[0]; + freeSignificand(); + significand.part = newPart; } + // Now that we have the right storage, switch the semantics. + semantics = &toSemantics; + + // If this is an extension, perform the shift now that the storage is + // available. + if (shift > 0 && (category==fcNormal || category==fcNaN)) + APInt::tcShiftLeft(significandParts(), newPartCount, shift); + if (category == fcNormal) { - /* Re-interpret our bit-pattern. */ - exponent += toSemantics.precision - semantics->precision; - semantics = &toSemantics; fs = normalize(rounding_mode, lostFraction); *losesInfo = (fs != opOK); } else if (category == fcNaN) { - int shift = toSemantics.precision - semantics->precision; - // Do this now so significandParts gets the right answer - const fltSemantics *oldSemantics = semantics; - semantics = &toSemantics; - *losesInfo = false; - // No normalization here, just truncate - if (shift>0) - APInt::tcShiftLeft(significandParts(), newPartCount, shift); - else if (shift < 0) { - unsigned ushift = -shift; - // Figure out if we are losing information. This happens - // if are shifting out something other than 0s, or if the x87 long - // double input did not have its integer bit set (pseudo-NaN), or if the - // x87 long double input did not have its QNan bit set (because the x87 - // hardware sets this bit when converting a lower-precision NaN to - // x87 long double). - if (APInt::tcLSB(significandParts(), newPartCount) < ushift) - *losesInfo = true; - if (oldSemantics == &APFloat::x87DoubleExtended && - (!(*significandParts() & 0x8000000000000000ULL) || - !(*significandParts() & 0x4000000000000000ULL))) - *losesInfo = true; - APInt::tcShiftRight(significandParts(), newPartCount, ushift); - } + *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan; // gcc forces the Quiet bit on, which means (float)(double)(float_sNan) // does not give you back the same bits. This is dubious, and we // don't currently do it. You're really supposed to get // an invalid operation signal at runtime, but nobody does that. fs = opOK; } else { - semantics = &toSemantics; - fs = opOK; *losesInfo = false; + fs = opOK; } return fs; diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 55cb433..506225f 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1440,15 +1440,11 @@ APInt APInt::sqrt() const { APInt nextSquare((x_old + 1) * (x_old +1)); if (this->ult(square)) return x_old; - else if (this->ule(nextSquare)) { - APInt midpoint((nextSquare - square).udiv(two)); - APInt offset(*this - square); - if (offset.ult(midpoint)) - return x_old; - else - return x_old + 1; - } else - llvm_unreachable("Error in APInt::sqrt computation"); + assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); + APInt midpoint((nextSquare - square).udiv(two)); + APInt offset(*this - square); + if (offset.ult(midpoint)) + return x_old; return x_old + 1; } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 4b43ae9..ce93449 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -57,6 +57,9 @@ TEMPLATE_INSTANTIATION(class opt<char>); TEMPLATE_INSTANTIATION(class opt<bool>); } } // end namespace llvm::cl +void GenericOptionValue::anchor() {} +void OptionValue<boolOrDefault>::anchor() {} +void OptionValue<std::string>::anchor() {} void Option::anchor() {} void basic_parser_impl::anchor() {} void parser<bool>::anchor() {} diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp index 8145664..1e89c6a 100644 --- a/lib/Support/DAGDeltaAlgorithm.cpp +++ b/lib/Support/DAGDeltaAlgorithm.cpp @@ -350,6 +350,9 @@ DAGDeltaAlgorithmImpl::Run() { return Required; } +void DAGDeltaAlgorithm::anchor() { +} + DAGDeltaAlgorithm::changeset_ty DAGDeltaAlgorithm::Run(const changeset_ty &Changes, const std::vector<edge_ty> &Dependencies) { diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index a19e4b4..86d1c5d 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -298,6 +298,8 @@ std::string sys::getHostCPUName() { } case 16: return "amdfam10"; + case 21: + return "bdver1"; default: return "generic"; } diff --git a/lib/Support/LLVMBuild.txt b/lib/Support/LLVMBuild.txt index f32ef8f..5b88be0 100644 --- a/lib/Support/LLVMBuild.txt +++ b/lib/Support/LLVMBuild.txt @@ -19,4 +19,3 @@ type = Library name = Support parent = Libraries - diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index 8874e94..6a873cb 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -19,7 +19,7 @@ //=== independent code. //===----------------------------------------------------------------------===// -#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0 +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 // Define all methods as no-ops if threading is explicitly disabled namespace llvm { using namespace sys; diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index a4d49dc..dcddeda 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -90,7 +90,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) { case 0xCF: { uint16_t type = 0; if (magic[0] == char(0xFE) && magic[1] == char(0xED) && - magic[2] == char(0xFA) && + magic[2] == char(0xFA) && (magic[3] == char(0xCE) || magic[3] == char(0xCF))) { /* Native endian */ if (length >= 16) type = magic[14] << 8 | magic[15]; @@ -152,31 +152,31 @@ sys::IdentifyFileType(const char *magic, unsigned length) { bool Path::isArchive() const { - LLVMFileType type; + fs::file_magic type; if (fs::identify_magic(str(), type)) return false; - return type == Archive_FileType; + return type == fs::file_magic::archive; } bool Path::isDynamicLibrary() const { - LLVMFileType type; + fs::file_magic type; if (fs::identify_magic(str(), type)) return false; switch (type) { default: return false; - case Mach_O_FixedVirtualMemorySharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case ELF_SharedObject_FileType: - case COFF_FileType: return true; + case fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case fs::file_magic::macho_dynamically_linked_shared_lib: + case fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case fs::file_magic::elf_shared_object: + case fs::file_magic::pecoff_executable: return true; } } bool Path::isObjectFile() const { - LLVMFileType type; - if (fs::identify_magic(str(), type) || type == Unknown_FileType) + fs::file_magic type; + if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown) return false; return true; } @@ -212,10 +212,10 @@ Path::appendSuffix(StringRef suffix) { bool Path::isBitcodeFile() const { - LLVMFileType type; + fs::file_magic type; if (fs::identify_magic(str(), type)) return false; - return type == Bitcode_FileType; + return type == fs::file_magic::bitcode; } bool Path::hasMagicNumber(StringRef Magic) const { diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index bebe442..7cc434b 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/PathV2.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include <cctype> #include <cstdio> @@ -492,7 +493,7 @@ bool is_separator(char value) { void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) { result.clear(); - + // Check whether the temporary directory is specified by an environment // variable. const char *EnvironmentVariable; @@ -505,7 +506,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) { result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); return; } - + // Fall back to a system default. const char *DefaultResult; #ifdef LLVM_ON_WIN32 @@ -519,7 +520,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) { #endif result.append(DefaultResult, DefaultResult + strlen(DefaultResult)); } - + bool has_root_name(const Twine &path) { SmallString<128> path_storage; StringRef p = path.toStringRef(path_storage); @@ -738,13 +739,124 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) { return success; } -error_code identify_magic(const Twine &path, LLVMFileType &result) { +/// @brief Identify the magic in magic. +file_magic identify_magic(StringRef magic) { + switch ((unsigned char)magic[0]) { + case 0xDE: // 0x0B17C0DE = BC wraper + if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && + magic[3] == (char)0x0B) + return file_magic::bitcode; + break; + case 'B': + if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE) + return file_magic::bitcode; + break; + case '!': + if (magic.size() >= 8) + if (memcmp(magic.data(),"!<arch>\n",8) == 0) + return file_magic::archive; + break; + + case '\177': + if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { + if (magic.size() >= 18 && magic[17] == 0) + switch (magic[16]) { + default: break; + case 1: return file_magic::elf_relocatable; + case 2: return file_magic::elf_executable; + case 3: return file_magic::elf_shared_object; + case 4: return file_magic::elf_core; + } + } + break; + + case 0xCA: + if (magic[1] == char(0xFE) && magic[2] == char(0xBA) && + magic[3] == char(0xBE)) { + // This is complicated by an overlap with Java class files. + // See the Mach-O section in /usr/share/file/magic for details. + if (magic.size() >= 8 && magic[7] < 43) + // FIXME: Universal Binary of any type. + return file_magic::macho_dynamically_linked_shared_lib; + } + break; + + // The two magic numbers for mach-o are: + // 0xfeedface - 32-bit mach-o + // 0xfeedfacf - 64-bit mach-o + case 0xFE: + case 0xCE: + case 0xCF: { + uint16_t type = 0; + if (magic[0] == char(0xFE) && magic[1] == char(0xED) && + magic[2] == char(0xFA) && + (magic[3] == char(0xCE) || magic[3] == char(0xCF))) { + /* Native endian */ + if (magic.size() >= 16) type = magic[14] << 8 | magic[15]; + } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) && + magic[1] == char(0xFA) && magic[2] == char(0xED) && + magic[3] == char(0xFE)) { + /* Reverse endian */ + if (magic.size() >= 14) type = magic[13] << 8 | magic[12]; + } + switch (type) { + default: break; + case 1: return file_magic::macho_object; + case 2: return file_magic::macho_executable; + case 3: return file_magic::macho_fixed_virtual_memory_shared_lib; + case 4: return file_magic::macho_core; + case 5: return file_magic::macho_preload_executabl; + case 6: return file_magic::macho_dynamically_linked_shared_lib; + case 7: return file_magic::macho_dynamic_linker; + case 8: return file_magic::macho_bundle; + case 9: return file_magic::macho_dynamic_linker; + case 10: return file_magic::macho_dsym_companion; + } + break; + } + case 0xF0: // PowerPC Windows + case 0x83: // Alpha 32-bit + case 0x84: // Alpha 64-bit + case 0x66: // MPS R4000 Windows + case 0x50: // mc68K + case 0x4c: // 80386 Windows + if (magic[1] == 0x01) + return file_magic::coff_object; + + case 0x90: // PA-RISC Windows + case 0x68: // mc68K Windows + if (magic[1] == 0x02) + return file_magic::coff_object; + break; + + case 0x4d: // Possible MS-DOS stub on Windows PE file + if (magic[1] == 0x5a) { + uint32_t off = + *reinterpret_cast<const support::ulittle32_t*>(magic.data() + 0x3c); + // PE/COFF file, either EXE or DLL. + if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0) + return file_magic::pecoff_executable; + } + break; + + case 0x64: // x86-64 Windows. + if (magic[1] == char(0x86)) + return file_magic::coff_object; + break; + + default: + break; + } + return file_magic::unknown; +} + +error_code identify_magic(const Twine &path, file_magic &result) { SmallString<32> Magic; error_code ec = get_magic(path, Magic.capacity(), Magic); if (ec && ec != errc::value_too_large) return ec; - result = IdentifyFileType(Magic.data(), Magic.size()); + result = identify_magic(Magic); return success; } @@ -753,7 +865,9 @@ error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) { if (ft == file_type::directory_file) { // This code would be a lot better with exceptions ;/. error_code ec; - for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) { + directory_iterator i(path, ec); + if (ec) return ec; + for (directory_iterator e; i != e; i.increment(ec)) { if (ec) return ec; file_status st; if (error_code ec = i->status(st)) return ec; diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp index 01860b0..75bc282 100644 --- a/lib/Support/Program.cpp +++ b/lib/Support/Program.cpp @@ -13,6 +13,7 @@ #include "llvm/Support/Program.h" #include "llvm/Config/config.h" +#include "llvm/Support/system_error.h" using namespace llvm; using namespace sys; diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp index d0b1e10..d14b976 100644 --- a/lib/Support/RWMutex.cpp +++ b/lib/Support/RWMutex.cpp @@ -20,7 +20,7 @@ //=== independent code. //===----------------------------------------------------------------------===// -#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0 +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 // Define all methods as no-ops if threading is explicitly disabled namespace llvm { using namespace sys; diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 04a44a0..d8a6ad3 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -73,9 +73,12 @@ void Statistic::RegisterStatistic() { if (Enabled) StatInfo->addStatistic(this); + TsanHappensBefore(this); sys::MemoryFence(); // Remember we have been registered. + TsanIgnoreWritesBegin(); Initialized = true; + TsanIgnoreWritesEnd(); } } diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp index fdb251c..08b12b6 100644 --- a/lib/Support/ThreadLocal.cpp +++ b/lib/Support/ThreadLocal.cpp @@ -19,7 +19,7 @@ //=== independent code. //===----------------------------------------------------------------------===// -#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0 +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 // Define all methods as no-ops if threading is explicitly disabled namespace llvm { using namespace sys; diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp index 8f0bb93..7483225 100644 --- a/lib/Support/Threading.cpp +++ b/lib/Support/Threading.cpp @@ -24,7 +24,7 @@ static bool multithreaded_mode = false; static sys::Mutex* global_lock = 0; bool llvm::llvm_start_multithreaded() { -#if ENABLE_THREADS != 0 +#if LLVM_ENABLE_THREADS != 0 assert(!multithreaded_mode && "Already multithreaded!"); multithreaded_mode = true; global_lock = new sys::Mutex(true); @@ -39,7 +39,7 @@ bool llvm::llvm_start_multithreaded() { } void llvm::llvm_stop_multithreaded() { -#if ENABLE_THREADS != 0 +#if LLVM_ENABLE_THREADS != 0 assert(multithreaded_mode && "Not currently multithreaded!"); // We fence here to insure that all threaded operations are complete BEFORE we @@ -63,7 +63,7 @@ void llvm::llvm_release_global_lock() { if (multithreaded_mode) global_lock->release(); } -#if ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H) +#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H) #include <pthread.h> struct ThreadInfo { @@ -102,7 +102,7 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData, error: ::pthread_attr_destroy(&Attr); } -#elif ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32) +#elif LLVM_ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32) #include "Windows/Windows.h" #include <process.h> diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index ac4f005..8f58e70 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -20,6 +20,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case arm: return "arm"; case cellspu: return "cellspu"; + case hexagon: return "hexagon"; case mips: return "mips"; case mipsel: return "mipsel"; case mips64: return "mips64"; @@ -59,6 +60,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case mblaze: return "mblaze"; + case hexagon: return "hexagon"; + case sparcv9: case sparc: return "sparc"; @@ -150,6 +153,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return ppc; if (Name == "mblaze") return mblaze; + if (Name == "hexagon") + return hexagon; if (Name == "sparc") return sparc; if (Name == "sparcv9") @@ -295,6 +300,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { return mips64; else if (ArchName == "mips64el") return mips64el; + else if (ArchName == "hexagon") + return hexagon; else if (ArchName == "sparc") return sparc; else if (ArchName == "sparcv9") diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index b5488de..aebb4ab 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -275,28 +275,17 @@ error_code exists(const Twine &path, bool &result) { return success; } -error_code equivalent(const Twine &A, const Twine &B, bool &result) { - // Get arguments. - SmallString<128> a_storage; - SmallString<128> b_storage; - StringRef a = A.toNullTerminatedStringRef(a_storage); - StringRef b = B.toNullTerminatedStringRef(b_storage); - - struct stat stat_a, stat_b; - int error_b = ::stat(b.begin(), &stat_b); - int error_a = ::stat(a.begin(), &stat_a); - - // If both are invalid, it's an error. If only one is, the result is false. - if (error_a != 0 || error_b != 0) { - if (error_a == error_b) - return error_code(errno, system_category()); - result = false; - } else { - result = - stat_a.st_dev == stat_b.st_dev && - stat_a.st_ino == stat_b.st_ino; - } +bool equivalent(file_status A, file_status B) { + assert(status_known(A) && status_known(B)); + return A.st_dev == B.st_dev && + A.st_ino == B.st_ino; +} +error_code equivalent(const Twine &A, const Twine &B, bool &result) { + file_status fsA, fsB; + if (error_code ec = status(A, fsA)) return ec; + if (error_code ec = status(B, fsB)) return ec; + result = equivalent(fsA, fsB); return success; } @@ -343,6 +332,9 @@ error_code status(const Twine &path, file_status &result) { else result = file_status(file_type::type_unknown); + result.st_dev = status.st_dev; + result.st_ino = status.st_ino; + return success; } @@ -441,7 +433,8 @@ rety_open_create: return success; } -error_code directory_iterator_construct(directory_iterator &it, StringRef path){ +error_code detail::directory_iterator_construct(detail::DirIterState &it, + StringRef path){ SmallString<128> path_null(path); DIR *directory = ::opendir(path_null.c_str()); if (directory == 0) @@ -454,7 +447,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){ return directory_iterator_increment(it); } -error_code directory_iterator_destruct(directory_iterator& it) { +error_code detail::directory_iterator_destruct(detail::DirIterState &it) { if (it.IterationHandle) ::closedir(reinterpret_cast<DIR *>(it.IterationHandle)); it.IterationHandle = 0; @@ -462,7 +455,7 @@ error_code directory_iterator_destruct(directory_iterator& it) { return success; } -error_code directory_iterator_increment(directory_iterator& it) { +error_code detail::directory_iterator_increment(detail::DirIterState &it) { errno = 0; dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle)); if (cur_dir == 0 && errno != 0) { diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 346baf1..e5990d0 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -412,19 +412,19 @@ Program::Kill(std::string* ErrMsg) { return false; } -bool Program::ChangeStdinToBinary(){ +error_code Program::ChangeStdinToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. - return false; + return make_error_code(errc::success); } -bool Program::ChangeStdoutToBinary(){ +error_code Program::ChangeStdoutToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. - return false; + return make_error_code(errc::success); } -bool Program::ChangeStderrToBinary(){ +error_code Program::ChangeStderrToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. - return false; + return make_error_code(errc::success); } } diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp index 078d705..2b250a3 100644 --- a/lib/Support/Valgrind.cpp +++ b/lib/Support/Valgrind.cpp @@ -53,6 +53,7 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { #endif // !HAVE_VALGRIND_VALGRIND_H +#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG) // These functions require no implementation, tsan just looks at the arguments // they're called with. extern "C" { @@ -63,3 +64,4 @@ void AnnotateHappensAfter(const char *file, int line, void AnnotateIgnoreWritesBegin(const char *file, int line) {} void AnnotateIgnoreWritesEnd(const char *file, int line) {} } +#endif diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index bc597b2..7ca33c0 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -17,7 +17,6 @@ //===----------------------------------------------------------------------===// #include "Windows.h" -#include <wincrypt.h> #include <fcntl.h> #include <io.h> #include <sys/stat.h> @@ -112,14 +111,6 @@ namespace { return success; } - // Forwarder for ScopedHandle. - BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) { - return ::CryptReleaseContext(Provider, 0); - } - - typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1), - BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext> - ScopedCryptContext; bool is_separator(const wchar_t value) { switch (value) { case L'\\': @@ -359,68 +350,22 @@ error_code exists(const Twine &path, bool &result) { return success; } -error_code equivalent(const Twine &A, const Twine &B, bool &result) { - // Get arguments. - SmallString<128> a_storage; - SmallString<128> b_storage; - StringRef a = A.toStringRef(a_storage); - StringRef b = B.toStringRef(b_storage); - - // Convert to utf-16. - SmallVector<wchar_t, 128> wide_a; - SmallVector<wchar_t, 128> wide_b; - if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec; - if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec; - - AutoHandle HandleB( - ::CreateFileW(wide_b.begin(), - 0, - FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - 0, - OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, - 0)); - - AutoHandle HandleA( - ::CreateFileW(wide_a.begin(), - 0, - FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - 0, - OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, - 0)); - - // If both handles are invalid, it's an error. - if (HandleA == INVALID_HANDLE_VALUE && - HandleB == INVALID_HANDLE_VALUE) - return windows_error(::GetLastError()); - - // If only one is invalid, it's false. - if (HandleA == INVALID_HANDLE_VALUE && - HandleB == INVALID_HANDLE_VALUE) { - result = false; - return success; - } - - // Get file information. - BY_HANDLE_FILE_INFORMATION InfoA, InfoB; - if (!::GetFileInformationByHandle(HandleA, &InfoA)) - return windows_error(::GetLastError()); - if (!::GetFileInformationByHandle(HandleB, &InfoB)) - return windows_error(::GetLastError()); - - // See if it's all the same. - result = - InfoA.dwVolumeSerialNumber == InfoB.dwVolumeSerialNumber && - InfoA.nFileIndexHigh == InfoB.nFileIndexHigh && - InfoA.nFileIndexLow == InfoB.nFileIndexLow && - InfoA.nFileSizeHigh == InfoB.nFileSizeHigh && - InfoA.nFileSizeLow == InfoB.nFileSizeLow && - InfoA.ftLastWriteTime.dwLowDateTime == - InfoB.ftLastWriteTime.dwLowDateTime && - InfoA.ftLastWriteTime.dwHighDateTime == - InfoB.ftLastWriteTime.dwHighDateTime; +bool equivalent(file_status A, file_status B) { + assert(status_known(A) && status_known(B)); + return A.FileIndexHigh == B.FileIndexHigh && + A.FileIndexLow == B.FileIndexLow && + A.FileSizeHigh == B.FileSizeHigh && + A.FileSizeLow == B.FileSizeLow && + A.LastWriteTimeHigh == B.LastWriteTimeHigh && + A.LastWriteTimeLow == B.LastWriteTimeLow && + A.VolumeSerialNumber == B.VolumeSerialNumber; +} +error_code equivalent(const Twine &A, const Twine &B, bool &result) { + file_status fsA, fsB; + if (error_code ec = status(A, fsA)) return ec; + if (error_code ec = status(B, fsB)) return ec; + result = equivalent(fsA, fsB); return success; } @@ -478,8 +423,7 @@ error_code status(const Twine &path, file_status &result) { return success; } - if (error_code ec = UTF8ToUTF16(path8, - path_utf16)) + if (error_code ec = UTF8ToUTF16(path8, path_utf16)) return ec; DWORD attr = ::GetFileAttributesW(path_utf16.begin()); @@ -488,7 +432,7 @@ error_code status(const Twine &path, file_status &result) { // Handle reparse points. if (attr & FILE_ATTRIBUTE_REPARSE_POINT) { - AutoHandle h( + ScopedFileHandle h( ::CreateFileW(path_utf16.begin(), 0, // Attributes only. FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, @@ -496,14 +440,35 @@ error_code status(const Twine &path, file_status &result) { OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0)); - if (h == INVALID_HANDLE_VALUE) + if (!h) goto handle_status_error; } if (attr & FILE_ATTRIBUTE_DIRECTORY) result = file_status(file_type::directory_file); - else + else { result = file_status(file_type::regular_file); + ScopedFileHandle h( + ::CreateFileW(path_utf16.begin(), + 0, // Attributes only. + FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, + 0)); + if (!h) + goto handle_status_error; + BY_HANDLE_FILE_INFORMATION Info; + if (!::GetFileInformationByHandle(h, &Info)) + goto handle_status_error; + result.FileIndexHigh = Info.nFileIndexHigh; + result.FileIndexLow = Info.nFileIndexLow; + result.FileSizeHigh = Info.nFileSizeHigh; + result.FileSizeLow = Info.nFileSizeLow; + result.LastWriteTimeHigh = Info.ftLastWriteTime.dwHighDateTime; + result.LastWriteTimeLow = Info.ftLastWriteTime.dwLowDateTime; + result.VolumeSerialNumber = Info.dwVolumeSerialNumber; + } return success; @@ -535,7 +500,7 @@ error_code unique_file(const Twine &model, int &result_fd, if (makeAbsolute) { // Make model absolute by prepending a temp directory if it's not already. bool absolute = path::is_absolute(m); - + if (!absolute) { SmallVector<wchar_t, 64> temp_dir; if (error_code ec = TempDir(temp_dir)) return ec; @@ -691,7 +656,8 @@ error_code get_magic(const Twine &path, uint32_t len, return success; } -error_code directory_iterator_construct(directory_iterator &it, StringRef path){ +error_code detail::directory_iterator_construct(detail::DirIterState &it, + StringRef path){ SmallVector<wchar_t, 128> path_utf16; if (error_code ec = UTF8ToUTF16(path, @@ -722,7 +688,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){ error_code ec = windows_error(::GetLastError()); // Check for end. if (ec == windows_error::no_more_files) - return directory_iterator_destruct(it); + return detail::directory_iterator_destruct(it); return ec; } else FilenameLen = ::wcslen(FirstFind.cFileName); @@ -742,7 +708,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){ return success; } -error_code directory_iterator_destruct(directory_iterator& it) { +error_code detail::directory_iterator_destruct(detail::DirIterState &it) { if (it.IterationHandle != 0) // Closes the handle if it's valid. ScopedFindHandle close(HANDLE(it.IterationHandle)); @@ -751,13 +717,13 @@ error_code directory_iterator_destruct(directory_iterator& it) { return success; } -error_code directory_iterator_increment(directory_iterator& it) { +error_code detail::directory_iterator_increment(detail::DirIterState &it) { WIN32_FIND_DATAW FindData; if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) { error_code ec = windows_error(::GetLastError()); // Check for end. if (ec == windows_error::no_more_files) - return directory_iterator_destruct(it); + return detail::directory_iterator_destruct(it); return ec; } diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index 7e38168..80ccaa6 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -299,14 +299,14 @@ Program::Execute(const Path& path, Data_ = wpi; // Make sure these get closed no matter what. - AutoHandle hThread(pi.hThread); + ScopedCommonHandle hThread(pi.hThread); // Assign the process to a job if a memory limit is defined. - AutoHandle hJob(0); + ScopedJobHandle hJob; if (memoryLimit != 0) { hJob = CreateJobObject(0, 0); bool success = false; - if (hJob != 0) { + if (hJob) { JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli; memset(&jeli, 0, sizeof(jeli)); jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY; @@ -367,7 +367,17 @@ Program::Wait(const Path &path, return -2; } - return status & 0377; + if (!status) + return 0; + + // Pass 10(Warning) and 11(Error) to the callee as negative value. + if ((status & 0xBFFF0000U) == 0x80000000U) + return (int)status; + + if (status & 0xFF) + return status & 0x7FFFFFFF; + + return 1; } bool @@ -387,19 +397,25 @@ Program::Kill(std::string* ErrMsg) { return false; } -bool Program::ChangeStdinToBinary(){ +error_code Program::ChangeStdinToBinary(){ int result = _setmode( _fileno(stdin), _O_BINARY ); - return result == -1; + if (result == -1) + return error_code(errno, generic_category()); + return make_error_code(errc::success); } -bool Program::ChangeStdoutToBinary(){ +error_code Program::ChangeStdoutToBinary(){ int result = _setmode( _fileno(stdout), _O_BINARY ); - return result == -1; + if (result == -1) + return error_code(errno, generic_category()); + return make_error_code(errc::success); } -bool Program::ChangeStderrToBinary(){ +error_code Program::ChangeStderrToBinary(){ int result = _setmode( _fileno(stderr), _O_BINARY ); - return result == -1; + if (result == -1) + return error_code(errno, generic_category()); + return make_error_code(errc::success); } } diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index 0d4b8a2..3a7e90b 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -446,7 +446,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { } if (ExitOnUnhandledExceptions) - _exit(-3); + _exit(ep->ExceptionRecord->ExceptionCode); // Allow dialog box to pop up allowing choice to start debugger. if (OldFilter) diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h index 67b6f01..5c1da0d 100644 --- a/lib/Support/Windows/Windows.h +++ b/lib/Support/Windows/Windows.h @@ -26,6 +26,7 @@ #include "llvm/Config/config.h" // Get build system configuration settings #include <windows.h> +#include <wincrypt.h> #include <shlobj.h> #include <cassert> #include <string> @@ -41,70 +42,99 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { return true; } -class AutoHandle { - HANDLE handle; +template <typename HandleTraits> +class ScopedHandle { + typedef typename HandleTraits::handle_type handle_type; + handle_type Handle; + ScopedHandle(const ScopedHandle &other); // = delete; + void operator=(const ScopedHandle &other); // = delete; public: - AutoHandle(HANDLE h) : handle(h) {} + ScopedHandle() + : Handle(HandleTraits::GetInvalid()) {} + + explicit ScopedHandle(handle_type h) + : Handle(h) {} - ~AutoHandle() { - if (handle) - CloseHandle(handle); + ~ScopedHandle() { + if (HandleTraits::IsValid(Handle)) + HandleTraits::Close(Handle); } - operator HANDLE() { - return handle; + handle_type take() { + handle_type t = Handle; + Handle = HandleTraits::GetInvalid(); + return t; } - AutoHandle &operator=(HANDLE h) { - handle = h; + ScopedHandle &operator=(handle_type h) { + if (HandleTraits::IsValid(Handle)) + HandleTraits::Close(Handle); + Handle = h; return *this; } + + // True if Handle is valid. + operator bool() const { + return HandleTraits::IsValid(Handle) ? true : false; + } + + operator handle_type() const { + return Handle; + } }; -template <class HandleType, uintptr_t InvalidHandle, - class DeleterType, DeleterType D> -class ScopedHandle { - HandleType Handle; +struct CommonHandleTraits { + typedef HANDLE handle_type; -public: - ScopedHandle() : Handle(InvalidHandle) {} - ScopedHandle(HandleType handle) : Handle(handle) {} + static handle_type GetInvalid() { + return INVALID_HANDLE_VALUE; + } - ~ScopedHandle() { - if (Handle != HandleType(InvalidHandle)) - D(Handle); + static void Close(handle_type h) { + ::CloseHandle(h); } - HandleType take() { - HandleType temp = Handle; - Handle = HandleType(InvalidHandle); - return temp; + static bool IsValid(handle_type h) { + return h != GetInvalid(); } +}; - operator HandleType() const { return Handle; } +struct JobHandleTraits : CommonHandleTraits { + static handle_type GetInvalid() { + return NULL; + } +}; - ScopedHandle &operator=(HandleType handle) { - Handle = handle; - return *this; +struct CryptContextTraits : CommonHandleTraits { + typedef HCRYPTPROV handle_type; + + static handle_type GetInvalid() { + return 0; } - typedef void (*unspecified_bool_type)(); - static void unspecified_bool_true() {} + static void Close(handle_type h) { + ::CryptReleaseContext(h, 0); + } - // True if Handle is valid. - operator unspecified_bool_type() const { - return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true; + static bool IsValid(handle_type h) { + return h != GetInvalid(); } +}; - bool operator!() const { - return Handle == HandleType(InvalidHandle); +struct FindHandleTraits : CommonHandleTraits { + static void Close(handle_type h) { + ::FindClose(h); } }; -typedef ScopedHandle<HANDLE, uintptr_t(-1), - BOOL (WINAPI*)(HANDLE), ::FindClose> - ScopedFindHandle; +struct FileHandleTraits : CommonHandleTraits {}; + +typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle; +typedef ScopedHandle<FileHandleTraits> ScopedFileHandle; +typedef ScopedHandle<CryptContextTraits> ScopedCryptContext; +typedef ScopedHandle<FindHandleTraits> ScopedFindHandle; +typedef ScopedHandle<JobHandleTraits> ScopedJobHandle; namespace llvm { template <class T> diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 4927e9a..72d3986 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -20,6 +20,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/system_error.h" #include "llvm/ADT/STLExtras.h" #include <cctype> #include <cerrno> diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt index 0db4134..e678087 100644 --- a/lib/TableGen/CMakeLists.txt +++ b/lib/TableGen/CMakeLists.txt @@ -10,7 +10,3 @@ add_llvm_library(LLVMTableGen TGLexer.cpp TGParser.cpp ) - -add_llvm_library_dependencies(LLVMTableGen - LLVMSupport - ) diff --git a/lib/TableGen/LLVMBuild.txt b/lib/TableGen/LLVMBuild.txt index 4e24c37..54cedfd 100644 --- a/lib/TableGen/LLVMBuild.txt +++ b/lib/TableGen/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = TableGen parent = Libraries required_libraries = Support - diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index c06add4..8bcb029 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -2219,6 +2219,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { Record *DefProto = MC->DefPrototypes[i]; Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc); + if (!CurRec) + return true; if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc, TArgs, TemplateVals, true/*Delete args*/)) diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index bbca228..6ae287a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -493,11 +493,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. - case 'p': // The high single-precision register of a VFP double-precision - // register. case 'e': // The low doubleword register of a NEON quad register. - case 'f': // The high doubleword register of a NEON quad register. + case 'f': { // The high doubleword register of a NEON quad register. + if (!MI->getOperand(OpNum).isReg()) + return true; + unsigned Reg = MI->getOperand(OpNum).getReg(); + if (!ARM::QPRRegClass.contains(Reg)) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? + ARM::dsub_0 : ARM::dsub_1); + O << ARMInstPrinter::getRegisterName(SubReg); + return false; + } + + // These modifiers are not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. case 'H': // The highest-numbered register of a pair. return true; @@ -739,14 +749,14 @@ void ARMAsmPrinter::emitAttributes() { } // Signal various FP modes. - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); } - if (NoInfsFPMath && NoNaNsFPMath) + if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::Allowed); else @@ -759,7 +769,7 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) { + if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1); } @@ -1069,7 +1079,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } // Try to figure out the unwinding opcode out of src / dst regs. - if (MI->getDesc().mayStore()) { + if (MI->mayStore()) { // Register saves. assert(DstReg == ARM::SP && "Only stack pointer as a destination reg is supported"); @@ -1481,11 +1491,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third /// is the size in bytes of this constant pool entry. + /// The required alignment is specified on the basic block holding this MI. unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); - EmitAlignment(2); - // Mark the constant pool entry as data if we're not already in a data // region. OutStreamer.EmitDataRegion(); @@ -1934,4 +1943,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); } - diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9315348..8bf5475 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - bool isLoad = !MCID.mayStore(); + bool isLoad = !MI->mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); @@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } +bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { + if (MI->isBundle()) { + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + int PIdx = I->findFirstPredOperandIdx(); + if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) + return true; + } + return false; + } + + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + bool ARMBaseInstrInfo:: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { @@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { // FIXME: This confuses implicit_def with optional CPSR def. const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) + if (!MCID.getImplicitDefs() && !MI->hasOptionalDef()) return false; bool Found = false; @@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, /// By default, this returns true for every instruction with a /// PredicateOperand. bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; - if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); return AFI->isThumb2Function(); @@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); if (MI->isLabel()) return 0; - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI->getOpcode(); switch (Opc) { case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: @@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::EH_LABEL: case TargetOpcode::DBG_VALUE: return 0; + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); case ARM::MOVi16_ga_pcrel: case ARM::MOVTi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: @@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); unsigned NumOps = MCID.getNumOperands(); MachineOperand JTOP = - MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); + MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); assert(MJTI != 0); @@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 0; // Not reached } +unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += GetInstSizeInBytes(&*I); + } + return Size; +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); + return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); } void ARMBaseInstrInfo:: @@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); + return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ @@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. - MachineBasicBlock::const_iterator I = CmpInstr, E = MI, - B = MI->getParent()->begin(); + MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; @@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, bool isKill = UseMI->getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), - *UseMI, UseMI->getDebugLoc(), + UseMI, UseMI->getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) .addImm(SOImmValV1))); @@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return UseCycle; } +static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &DefIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_iterator I = MI; ++I; + MachineBasicBlock::const_instr_iterator II = + llvm::prior(I.getInstrIterator()); + assert(II->isInsideBundle() && "Empty bundle?"); + + int Idx = -1; + while (II->isInsideBundle()) { + Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (Idx != -1) + break; + --II; + ++Dist; + } + + assert(Idx != -1 && "Cannot find bundled definition!"); + DefIdx = Idx; + return II; +} + +static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &UseIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_instr_iterator II = MI; ++II; + assert(II->isInsideBundle() && "Empty bundle?"); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + // FIXME: This doesn't properly handle multiple uses. + int Idx = -1; + while (II != E && II->isInsideBundle()) { + Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); + if (Idx != -1) + break; + if (II->getOpcode() != ARM::t2IT) + ++Dist; + ++II; + } + + if (Idx == -1) { + Dist = 0; + return 0; + } + + UseIdx = Idx; + return II; +} + int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; - const MCInstrDesc &DefMCID = DefMI->getDesc(); if (!ItinData || ItinData->isEmpty()) - return DefMCID.mayLoad() ? 3 : 1; + return DefMI->mayLoad() ? 3 : 1; - const MCInstrDesc &UseMCID = UseMI->getDesc(); + const MCInstrDesc *DefMCID = &DefMI->getDesc(); + const MCInstrDesc *UseMCID = &UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); - if (DefMO.getReg() == ARM::CPSR) { + unsigned Reg = DefMO.getReg(); + if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) return Subtarget.isCortexA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. - if (UseMCID.isBranch()) + if (UseMI->isBranch()) return 0; + + // Otherwise it takes the instruction latency (generally one). + int Latency = getInstrLatency(ItinData, DefMI); + + // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to + // its uses. Instructions which are otherwise scheduled between them may + // incur a code size penalty (not able to use the CPSR setting 16-bit + // instructions). + if (Latency > 0 && Subtarget.isThumb2()) { + const MachineFunction *MF = DefMI->getParent()->getParent(); + if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + --Latency; + } + return Latency; } unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, - UseMCID, UseIdx, UseAlign); + + unsigned DefAdj = 0; + if (DefMI->isBundle()) { + DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); + if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || + DefMI->isRegSequence() || DefMI->isImplicitDef()) + return 1; + DefMCID = &DefMI->getDesc(); + } + unsigned UseAdj = 0; + if (UseMI->isBundle()) { + unsigned NewUseIdx; + const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, + Reg, NewUseIdx, UseAdj); + if (NewUseMI) { + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); + } + } + + int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, + *UseMCID, UseIdx, UseAlign); + int Adj = DefAdj + UseAdj; + if (Adj) { + Latency -= (int)(DefAdj + UseAdj); + if (Latency < 1) + return 1; + } if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: @@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8: case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: - case ARM::VLD1DUPq8_UPD: - case ARM::VLD1DUPq16_UPD: - case ARM::VLD1DUPq32_UPD: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: @@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8PseudoWB_fixed: + case ARM::VLD2d16PseudoWB_fixed: + case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8PseudoWB_register: + case ARM::VLD2d16PseudoWB_register: + case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD1DUPq8PseudoWB_fixed: + case ARM::VLD1DUPq16PseudoWB_fixed: + case ARM::VLD1DUPq32PseudoWB_fixed: + case ARM::VLD1DUPq8PseudoWB_register: + case ARM::VLD1DUPq16PseudoWB_register: + case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: @@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned +ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { + unsigned Reg = DefMI->getOperand(DefIdx).getReg(); + if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) + return 1; + + // If the second MI is predicated, then there is an implicit use dependency. + return getOperandLatency(ItinData, DefMI, DefIdx, DepMI, + DepMI->getNumOperands()); +} + int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; + if (MI->isBundle()) { + int Latency = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + if (I->getOpcode() != ARM::t2IT) + Latency += getInstrLatency(ItinData, I, PredCost); + } + return Latency; + } + const MCInstrDesc &MCID = MI->getDesc(); unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 0f9f321..68e8208 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -69,10 +69,7 @@ public: bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; // Predication support. - bool isPredicated(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; - } + bool isPredicated(const MachineInstr *MI) const; ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { int PIdx = MI->findFirstPredOperandIdx(); @@ -213,12 +210,18 @@ public: SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; + virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const; + /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; private: + unsigned getInstBundleLength(const MachineInstr *MI) const; + int getVLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, unsigned DefClass, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 7c42342..8ee6ce2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -631,7 +631,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - return (RealignStack && !AFI->isThumb1OnlyFunction() && + return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() && (!MFI->hasVarSizedObjects() || EnableBasePointer)); } @@ -649,7 +649,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->adjustsStack()) + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index d74ccfa..365f0bb 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -401,7 +401,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) emitInstruction(*I); } diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 3e3a413..2039d41 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -26,6 +26,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -51,6 +52,43 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +static cl::opt<bool> +AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), + cl::desc("Align constant islands in code")); + +/// UnknownPadding - Return the worst case padding that could result from +/// unknown offset bits. This does not include alignment padding caused by +/// known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact, +/// add padding such that: +/// +/// 1. The result is aligned to 1 << LogAlign. +/// +/// 2. No other value of the unknown bits would require more padding. +/// +/// This may add more padding than is required to satisfy just one of the +/// constraints. It is necessary to compute alignment this way to guarantee +/// that we don't underestimate the padding before an aligned block. If the +/// real padding before a block is larger than we think, constant pool entries +/// may go out of range. +static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign, + unsigned KnownBits) { + // Add the worst possible padding that the unknown bits could cause. + Offset += UnknownPadding(LogAlign, KnownBits); + + // Then align the result. + return RoundUpToAlignment(Offset, 1u << LogAlign); +} + namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions @@ -64,16 +102,70 @@ namespace { /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. class ARMConstantIslands : public MachineFunctionPass { - /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed - /// by MBB Number. The two-byte pads required for Thumb alignment are - /// counted as part of the following block (i.e., the offset and size for - /// a padded block will both be ==2 mod 4). - std::vector<unsigned> BBSizes; + /// BasicBlockInfo - Information about the offset and size of a single + /// basic block. + struct BasicBlockInfo { + /// Offset - Distance from the beginning of the function to the beginning + /// of this basic block. + /// + /// The offset is always aligned as required by the basic block. + unsigned Offset; + + /// Size - Size of the basic block in bytes. If the block contains + /// inline assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// KnownBits - The number of low bits in Offset that are known to be + /// exact. The remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// Unalign - When non-zero, the block contains instructions (inline asm) + /// of unknown size. The real size may be smaller than Size bytes by a + /// multiple of 1 << Unalign. + uint8_t Unalign; + + /// PostAlign - When non-zero, the block terminator contains a .align + /// directive, so the end of the block is aligned to 1 << PostAlign + /// bytes. + uint8_t PostAlign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), + PostAlign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + return Unalign ? Unalign : KnownBits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + unsigned LA = std::max(unsigned(PostAlign), LogAlign); + if (!LA) + return PO; + // Add alignment padding from the terminator. + return WorstCaseAlign(PO, LA, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(std::max(unsigned(PostAlign), LogAlign), + internalKnownBits()); + } + }; - /// BBOffsets - the offset of each MBB in bytes, starting from 0. - /// The two-byte pads required for Thumb alignment are counted as part of - /// the following block. - std::vector<unsigned> BBOffsets; + std::vector<BasicBlockInfo> BBInfo; /// WaterList - A sorted list of basic blocks where islands could be placed /// (i.e. blocks that don't fall through to the following block, due @@ -162,9 +254,8 @@ namespace { /// the branch fix up pass. bool HasFarJump; - /// HasInlineAsm - True if the function contains inline assembly. - bool HasInlineAsm; - + MachineFunction *MF; + MachineConstantPool *MCP; const ARMInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; @@ -182,67 +273,65 @@ namespace { } private: - void DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs); + void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - void JumpTableFunctionScan(MachineFunction &MF); - void InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs); + unsigned getCPELogAlign(const MachineInstr *CPEMI); + void JumpTableFunctionScan(); + void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs); MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); + void AdjustBBOffsetsAfter(MachineBasicBlock *BB); bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); + bool HandleConstantPoolUser(unsigned CPUserIndex); void RemoveDeadCPEMI(MachineInstr *CPEMI); bool RemoveUnusedCPEntries(); bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned Disp, bool NegOk, bool DoDump = false); bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, - CPUser &U); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, - unsigned Disp, bool NegativeOK, bool IsSoImm = false); + CPUser &U, unsigned &Growth); bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); + bool FixUpImmediateBr(ImmBranch &Br); + bool FixUpConditionalBr(ImmBranch &Br); + bool FixUpUnconditionalBr(ImmBranch &Br); bool UndoLRSpillRestore(); - bool OptimizeThumb2Instructions(MachineFunction &MF); - bool OptimizeThumb2Branches(MachineFunction &MF); - bool ReorderThumb2JumpTables(MachineFunction &MF); - bool OptimizeThumb2JumpTables(MachineFunction &MF); + bool OptimizeThumb2Instructions(); + bool OptimizeThumb2Branches(); + bool ReorderThumb2JumpTables(); + bool OptimizeThumb2JumpTables(); MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); + void ComputeBlockSize(MachineBasicBlock *MBB); unsigned GetOffsetOf(MachineInstr *MI) const; void dumpBBs(); - void verify(MachineFunction &MF); + void verify(); + + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned Disp, bool NegativeOK, bool IsSoImm = false); + bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + const CPUser &U) { + return OffsetIsInRange(UserOffset, TrialOffset, + U.MaxDisp, U.NegOk, U.IsSoImm); + } }; char ARMConstantIslands::ID = 0; } /// verify - check BBOffsets, BBSizes, alignment of islands -void ARMConstantIslands::verify(MachineFunction &MF) { - assert(BBOffsets.size() == BBSizes.size()); - for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) - assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); - if (!isThumb) - return; +void ARMConstantIslands::verify() { #ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && - MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned MBBId = MBB->getNumber(); - assert(HasInlineAsm || - (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || - (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); - } + unsigned Align = MBB->getAlignment(); + unsigned MBBId = MBB->getNumber(); + assert(BBInfo[MBBId].Offset % (1u << Align) == 0); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; @@ -257,10 +346,16 @@ void ARMConstantIslands::verify(MachineFunction &MF) { /// print block size and offset information - debugging void ARMConstantIslands::dumpBBs() { - for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { - DEBUG(errs() << "block " << J << " offset " << BBOffsets[J] - << " size " << BBSizes[J] << "\n"); - } + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << " pa=" << unsigned(BBI.PostAlign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); } /// createARMConstantIslandPass - returns an instance of the constpool @@ -269,34 +364,38 @@ FunctionPass *llvm::createARMConstantIslandPass() { return new ARMConstantIslands(); } -bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { - MachineConstantPool &MCP = *MF.getConstantPool(); +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MCP = mf.getConstantPool(); - TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo(); - AFI = MF.getInfo<ARMFunctionInfo>(); - STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); + DEBUG(dbgs() << "***** ARMConstantIslands: " + << MCP->getConstants().size() << " CP entries, aligned to " + << MCP->getConstantPoolAlignment() << " bytes *****\n"); + + TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo(); + AFI = MF->getInfo<ARMFunctionInfo>(); + STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); isThumb2 = AFI->isThumb2Function(); HasFarJump = false; - HasInlineAsm = false; // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. - MF.RenumberBlocks(); + MF->RenumberBlocks(); // Try to reorder and otherwise adjust the block layout to make good use // of the TB[BH] instructions. bool MadeChange = false; if (isThumb2 && AdjustJumpTableBlocks) { - JumpTableFunctionScan(MF); - MadeChange |= ReorderThumb2JumpTables(MF); + JumpTableFunctionScan(); + MadeChange |= ReorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. T2JumpTables.clear(); // Blocks may have shifted around. Keep the numbering up to date. - MF.RenumberBlocks(); + MF->RenumberBlocks(); } // Thumb1 functions containing constant pools get 4-byte alignment. @@ -304,16 +403,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // ARM and Thumb2 functions need to be 4-byte aligned. if (!isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) + MF->EnsureAlignment(2); // 2 = log2(4) // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; - if (!MCP.isEmpty()) { - DoInitialPlacement(MF, CPEMIs); - if (isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) - } + if (!MCP->isEmpty()) + DoInitialPlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -321,7 +417,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(MF, CPEMIs); + InitialFunctionScan(CPEMIs); CPEMIs.clear(); DEBUG(dumpBBs()); @@ -333,9 +429,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // is no change. unsigned NoCPIters = 0, NoBRIters = 0; while (true) { + DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= HandleConstantPoolUser(MF, i); + CPChange |= HandleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -344,9 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // appear as "new water" for the next iteration of constant pool placement. NewWaterList.clear(); + DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= FixUpImmediateBr(MF, ImmBranches[i]); + BRChange |= FixUpImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) llvm_unreachable("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); @@ -358,10 +456,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Shrink 32-bit Thumb2 branch, load, and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) - MadeChange |= OptimizeThumb2Instructions(MF); + MadeChange |= OptimizeThumb2Instructions(); // After a while, this might be made debug-only, but it is not expensive. - verify(MF); + verify(); // If LR has been forced spilled and no far jump (i.e. BL) has been issued, // undo the spill / restore of LR if possible. @@ -376,10 +474,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } } - DEBUG(errs() << '\n'; dumpBBs()); + DEBUG(dbgs() << '\n'; dumpBBs()); - BBSizes.clear(); - BBOffsets.clear(); + BBInfo.clear(); WaterList.clear(); CPUsers.clear(); CPEntries.clear(); @@ -392,37 +489,65 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { /// DoInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. -void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs) { +void +ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. - MachineBasicBlock *BB = MF.CreateMachineBasicBlock(); - MF.push_back(BB); + MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). + unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + + // Mark the basic block as required by the const-pool. + // If AlignConstantIslands isn't set, use 4-byte alignment for everything. + BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + + // The function needs to be as aligned as the basic blocks. The linker may + // move functions around based on their alignment. + MF->EnsureAlignment(BB->getAlignment()); + + // Order the entries in BB by descending alignment. That ensures correct + // alignment of all entries as long as BB is sufficiently aligned. Keep + // track of the insertion point for each alignment. We are going to bucket + // sort the entries as they are created. + SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. - const std::vector<MachineConstantPoolEntry> &CPs = - MF.getConstantPool()->getConstants(); + const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const TargetData &TD = *MF.getTarget().getTargetData(); + const TargetData &TD = *MF->getTarget().getTargetData(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); - // Verify that all constant pool entries are a multiple of 4 bytes. If not, - // we would have to pad them out or something so that instructions stay - // aligned. - assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); + assert(Size >= 4 && "Too small constant pool entry"); + unsigned Align = CPs[i].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid alignment"); + // Verify that all constant pool entries are a multiple of their alignment. + // If not, we would have to pad them out so that instructions stay aligned. + assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); + + // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. + unsigned LogAlign = Log2_32(Align); + MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; MachineInstr *CPEMI = - BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) + BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(i).addConstantPoolIndex(i).addImm(Size); CPEMIs.push_back(CPEMI); + // Ensure that future entries with higher alignment get inserted before + // CPEMI. This is bucket sort with iterators. + for (unsigned a = LogAlign + 1; a < MaxAlign; ++a) + if (InsPoint[a] == InsAt) + InsPoint[a] = CPEMI; + // Add a new CPEntry, but no corresponding CPUser yet. std::vector<CPEntry> CPEs; CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); ++NumCPEs; - DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i - << "\n"); + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n"); } + DEBUG(BB->dump()); } /// BBHasFallthrough - Return true if the specified basic block can fallthrough @@ -458,17 +583,33 @@ ARMConstantIslands::CPEntry return NULL; } +/// getCPELogAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. Alignment is measured in log2(bytes) units. +unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { + assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); + + // Everything is 4-byte aligned unless AlignConstantIslands is set. + if (!AlignConstantIslands) + return 2; + + unsigned CPI = CPEMI->getOperand(1).getIndex(); + assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); + unsigned Align = MCP->getConstants()[CPI].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); + return Log2_32(Align); +} + /// JumpTableFunctionScan - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. -void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); +void ARMConstantIslands::JumpTableFunctionScan() { + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) - if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT) + if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT) T2JumpTables.push_back(I); } } @@ -476,23 +617,27 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { /// InitialFunctionScan - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. -void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs) { - // First thing, see if the function has any inline assembly in it. If so, - // we have to be conservative about alignment assumptions, as we don't - // know for sure the size of any instructions in the inline assembly. - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) - if (I->getOpcode() == ARM::INLINEASM) - HasInlineAsm = true; - } +void ARMConstantIslands:: +InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + ComputeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + AdjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. - unsigned Offset = 0; - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -501,16 +646,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, if (!BBHasFallthrough(&MBB)) WaterList.push_back(&MBB); - unsigned MBBSize = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->isDebugValue()) continue; - // Add instruction size to MBBSize. - MBBSize += TII->GetInstSizeInBytes(I); int Opc = I->getOpcode(); - if (I->getDesc().isBranch()) { + if (I->isBranch()) { bool isCond = false; unsigned Bits = 0; unsigned Scale = 1; @@ -518,18 +660,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, switch (Opc) { default: continue; // Ignore other JT branches - case ARM::tBR_JTr: - // A Thumb1 table jump may involve padding; for the offsets to - // be right, functions containing these must be 4-byte aligned. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr + 2 - // is aligned. That is held in Offset+MBBSize, which already has - // 2 added in for the size of the mov pc instruction. - MF.EnsureAlignment(2U); - if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) - // FIXME: Add a pseudo ALIGN instruction instead. - MBBSize += 2; // padding - continue; // Does not get an entry in ImmBranches case ARM::t2BR_JT: T2JumpTables.push_back(I); continue; // Does not get an entry in ImmBranches @@ -647,18 +777,30 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, break; } } + } +} - // In thumb mode, if this block is a constpool island, we may need padding - // so it's aligned on 4 byte boundary. - if (isThumb && - !MBB.empty() && - MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && - ((Offset%4) != 0 || HasInlineAsm)) - MBBSize += 2; - - BBSizes.push_back(MBBSize); - BBOffsets.push_back(Offset); - Offset += MBBSize; +/// ComputeBlockSize - Compute the size and some alignment information for MBB. +/// This function updates BBInfo directly. +void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + BBI.PostAlign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->GetInstSizeInBytes(I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = isThumb ? 1 : 2; + } + + // tBR_JTr contains a .align 2 directive. + if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { + BBI.PostAlign = 2; + MBB->getParent()->EnsureAlignment(2); } } @@ -671,14 +813,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { // The offset is composed of two things: the sum of the sizes of all MBB's // before this instruction's block, and the offset from the start of the block // it is in. - unsigned Offset = BBOffsets[MBB->getNumber()]; - - // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has - // alignment padding, and compensate if so. - if (isThumb && - MI->getOpcode() == ARM::CONSTPOOL_ENTRY && - (Offset%4 != 0 || HasInlineAsm)) - Offset += 2; + unsigned Offset = BBInfo[MBB->getNumber()].Offset; // Sum instructions before MI in MBB. for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { @@ -702,12 +837,9 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add NewMBB as having // available water after it. @@ -723,13 +855,12 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// account for this change and returns the newly created block. MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); - MachineFunction &MF = *OrigBB->getParent(); // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); @@ -747,16 +878,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. - while (!OrigBB->succ_empty()) { - MachineBasicBlock *Succ = *OrigBB->succ_begin(); - OrigBB->removeSuccessor(Succ); - NewBB->addSuccessor(Succ); - - // This pass should be run after register allocation, so there should be no - // PHI nodes to update. - assert((Succ->empty() || !Succ->begin()->isPHI()) - && "PHI nodes should be eliminated by now!"); - } + NewBB->transferSuccessors(OrigBB); // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); @@ -764,14 +886,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // Update internal data structures to account for the newly inserted MBB. // This is almost the same as UpdateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add OrigMBB as having // available water after it (but not if it's already there, which happens @@ -787,54 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); - unsigned OrigBBI = OrigBB->getNumber(); - unsigned NewBBI = NewBB->getNumber(); - - int delta = isThumb1 ? 2 : 4; - // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - unsigned OrigBBSize = 0; - for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end(); - I != E; ++I) - OrigBBSize += TII->GetInstSizeInBytes(I); - BBSizes[OrigBBI] = OrigBBSize; - - // ...and adjust BBOffsets for NewBB accordingly. - BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + ComputeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - unsigned NewBBSize = 0; - for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); - I != E; ++I) - NewBBSize += TII->GetInstSizeInBytes(I); - // Set the size of NewBB in BBSizes. It does not include any padding now. - BBSizes[NewBBI] = NewBBSize; - - MachineInstr* ThumbJTMI = prior(NewBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - // We've added another 2-byte instruction before this tablejump, which - // means we will always need padding if we didn't before, and vice versa. - - // The original offset of the jump instruction was: - unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta; - if (OrigOffset%4 == 0) { - // We had padding before and now we don't. No net change in code size. - delta = 0; - } else { - // We didn't have padding before and now we do. - BBSizes[NewBBI] += 2; - delta = 4; - } - } + ComputeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. - if (delta) - AdjustBBOffsetsAfter(NewBB, delta); + AdjustBBOffsetsAfter(OrigBB); return NewBB; } @@ -882,19 +966,44 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, /// WaterIsInRange - Returns true if a CPE placed after the specified /// Water (a basic block) will be in range for the specific MI. - +/// +/// Compute how much the function will grow by inserting a CPE after Water. bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, - MachineBasicBlock* Water, CPUser &U) { - unsigned MaxDisp = U.MaxDisp; - unsigned CPEOffset = BBOffsets[Water->getNumber()] + - BBSizes[Water->getNumber()]; - - // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. - if (CPEOffset < UserOffset) - UserOffset += U.CPEMI->getOperand(2).getImm(); - - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm); + MachineBasicBlock* Water, CPUser &U, + unsigned &Growth) { + unsigned CPELogAlign = getCPELogAlign(U.CPEMI); + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); + unsigned NextBlockOffset, NextBlockAlignment; + MachineFunction::const_iterator NextBlock = Water; + if (++NextBlock == MF->end()) { + NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); + NextBlockAlignment = 0; + } else { + NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; + NextBlockAlignment = NextBlock->getAlignment(); + } + unsigned Size = U.CPEMI->getOperand(2).getImm(); + unsigned CPEEnd = CPEOffset + Size; + + // The CPE may be able to hide in the alignment padding before the next + // block. It may also cause more padding to be required if it is more aligned + // that the next block. + if (CPEEnd > NextBlockOffset) { + Growth = CPEEnd - NextBlockOffset; + // Compute the padding that would go at the end of the CPE to align the next + // block. + Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. Also account for unknown alignment padding + // in blocks between CPE and the user. + if (CPEOffset < UserOffset) + UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + } else + // CPE fits in existing padding. + Growth = 0; + + return OffsetIsInRange(UserOffset, CPEOffset, U); } /// CPEIsInRange - Returns true if the distance between specific MI and @@ -903,14 +1012,20 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { unsigned CPEOffset = GetOffsetOf(CPEMI); - assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE"); + assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { - DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() - << " max delta=" << MaxDisp - << " insn address=" << UserOffset - << " CPE address=" << CPEOffset - << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI); + DEBUG({ + unsigned Block = MI->getParent()->getNumber(); + const BasicBlockInfo &BBI = BBInfo[Block]; + dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << MaxDisp + << format(" insn address=%#x", UserOffset) + << " in BB#" << Block << ": " + << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI + << format("CPE address=%#x offset=%+d: ", CPEOffset, + int(CPEOffset-UserOffset)); + }); } return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); @@ -933,55 +1048,17 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { } #endif // NDEBUG -void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, - int delta) { - MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI); - for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); - i < e; ++i) { - BBOffsets[i] += delta; - // If some existing blocks have padding, adjust the padding as needed, a - // bit tricky. delta can be negative so don't use % on that. - if (!isThumb) - continue; - MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && !HasInlineAsm) { - // Constant pool entries require padding. - if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned OldOffset = BBOffsets[i] - delta; - if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } - } - // Thumb1 jump tables require padding. They should be at the end; - // following unconditional branches are removed by AnalyzeBranch. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr - // is aligned; if it is, the offset of the jump table following the - // instruction will not be aligned, and we need padding. - MachineInstr *ThumbJTMI = prior(MBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); - unsigned OldMIOffset = NewMIOffset - delta; - if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } - } - if (delta==0) - return; - } - MBBI = llvm::next(MBBI); +void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { + for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; } } @@ -1016,7 +1093,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Check to see if the CPE is already in-range. if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { - DEBUG(errs() << "In range\n"); + DEBUG(dbgs() << "In range\n"); return 1; } @@ -1031,7 +1108,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) if (CPEs[i].CPEMI == NULL) continue; if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { - DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#" + DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; @@ -1079,10 +1156,9 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, if (WaterList.empty()) return false; - bool FoundWaterThatWouldPad = false; - water_iterator IPThatWouldPad; - for (water_iterator IP = prior(WaterList.end()), - B = WaterList.begin();; --IP) { + unsigned BestGrowth = ~0u; + for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; + --IP) { MachineBasicBlock* WaterBB = *IP; // Check if water is in range and is either at a lower address than the // current "high water mark" or a new water block that was created since @@ -1092,31 +1168,24 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, // should be relatively uncommon and when it does happen, we want to be // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. - if (WaterIsInRange(UserOffset, WaterBB, U) && + unsigned Growth; + if (WaterIsInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || - NewWaterList.count(WaterBB))) { - unsigned WBBId = WaterBB->getNumber(); - if (isThumb && - (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { - // This is valid Water, but would introduce padding. Remember - // it in case we don't find any Water that doesn't do this. - if (!FoundWaterThatWouldPad) { - FoundWaterThatWouldPad = true; - IPThatWouldPad = IP; - } - } else { - WaterIter = IP; + NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + // This is the least amount of required padding seen so far. + BestGrowth = Growth; + WaterIter = IP; + DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + << " Growth=" << Growth << '\n'); + + // Keep looking unless it is perfect. + if (BestGrowth == 0) return true; - } } if (IP == B) break; } - if (FoundWaterThatWouldPad) { - WaterIter = IPThatWouldPad; - return true; - } - return false; + return BestGrowth != ~0u; } /// CreateNewWater - No existing WaterList entry will work for @@ -1132,114 +1201,143 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; + unsigned CPELogAlign = getCPELogAlign(CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); - unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] + - BBSizes[UserMBB->getNumber()]; - assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); + const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; // If the block does not end in an unconditional branch already, and if the // end of the block is within range, make new water there. (The addition // below is for the unconditional branch we will be adding: 4 bytes on ARM + // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for // inside OffsetIsInRange. - if (BBHasFallthrough(UserMBB) && - OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4), - U.MaxDisp, U.NegOk, U.IsSoImm)) { - DEBUG(errs() << "Split at end of block\n"); - if (&UserMBB->back() == UserMI) - assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); - NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); - // Add an unconditional branch from UserMBB to fallthrough block. - // Record it for branch lengthening; this new branch will not get out of - // range, but if the preceding conditional branch is out of range, the - // targets will be exchanged, and the altered branch may be out of - // range, so the machinery has to know about it. - int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; - if (!isThumb) - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); - else - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) - .addImm(ARMCC::AL).addReg(0); - unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); - ImmBranches.push_back(ImmBranch(&UserMBB->back(), - MaxDisp, false, UncondBr)); - int delta = isThumb1 ? 2 : 4; - BBSizes[UserMBB->getNumber()] += delta; - AdjustBBOffsetsAfter(UserMBB, delta); - } else { - // What a big block. Find a place within the block to split it. - // This is a little tricky on Thumb1 since instructions are 2 bytes - // and constant pool entries are 4 bytes: if instruction I references - // island CPE, and instruction I+1 references CPE', it will - // not work well to put CPE as far forward as possible, since then - // CPE' cannot immediately follow it (that location is 2 bytes - // farther away from I+1 than CPE was from I) and we'd need to create - // a new island. So, we make a first guess, then walk through the - // instructions between the one currently being looked at and the - // possible insertion point, and make sure any other instructions - // that reference CPEs will be able to use the same island area; - // if not, we back up the insertion point. - - // The 4 in the following is for the unconditional branch we'll be - // inserting (allows for long branch on Thumb1). Alignment of the - // island is handled inside OffsetIsInRange. - unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; - // This could point off the end of the block if we've already got - // constant pool entries following this block; only the last one is - // in the water list. Back past any possible branches (allow for a - // conditional and a maximally long unconditional). - if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) - BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - - (isThumb1 ? 6 : 8); - unsigned EndInsertOffset = BaseInsertOffset + - CPEMI->getOperand(2).getImm(); - MachineBasicBlock::iterator MI = UserMI; - ++MI; - unsigned CPUIndex = CPUserIndex+1; - unsigned NumCPUsers = CPUsers.size(); - MachineInstr *LastIT = 0; - for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); - Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { - if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { - CPUser &U = CPUsers[CPUIndex]; - if (!OffsetIsInRange(Offset, EndInsertOffset, - U.MaxDisp, U.NegOk, U.IsSoImm)) { - BaseInsertOffset -= (isThumb1 ? 2 : 4); - EndInsertOffset -= (isThumb1 ? 2 : 4); - } - // This is overly conservative, as we don't account for CPEMIs - // being reused within the block, but it doesn't matter much. - EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); - CPUIndex++; - } + if (BBHasFallthrough(UserMBB)) { + // Size of branch to insert. + unsigned Delta = isThumb1 ? 2 : 4; + // End of UserBlock after adding a branch. + unsigned UserBlockEnd = UserBBI.postOffset() + Delta; + // Compute the offset where the CPE will begin. + unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, + UserBBI.postKnownBits()); + + if (OffsetIsInRange(UserOffset, CPEOffset, U)) { + DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() + << format(", expected CPE offset %#x\n", CPEOffset)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. Record + // it for branch lengthening; this new branch will not get out of range, + // but if the preceding conditional branch is out of range, the targets + // will be exchanged, and the altered branch may be out of range, so the + // machinery has to know about it. + int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; + if (!isThumb) + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + else + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) + .addImm(ARMCC::AL).addReg(0); + unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); + ImmBranches.push_back(ImmBranch(&UserMBB->back(), + MaxDisp, false, UncondBr)); + BBInfo[UserMBB->getNumber()].Size += Delta; + AdjustBBOffsetsAfter(UserMBB); + return; + } + } - // Remember the last IT instruction. - if (MI->getOpcode() == ARM::t2IT) - LastIT = MI; + // What a big block. Find a place within the block to split it. This is a + // little tricky on Thumb1 since instructions are 2 bytes and constant pool + // entries are 4 bytes: if instruction I references island CPE, and + // instruction I+1 references CPE', it will not work well to put CPE as far + // forward as possible, since then CPE' cannot immediately follow it (that + // location is 2 bytes farther away from I+1 than CPE was from I) and we'd + // need to create a new island. So, we make a first guess, then walk through + // the instructions between the one currently being looked at and the + // possible insertion point, and make sure any other instructions that + // reference CPEs will be able to use the same island area; if not, we back + // up the insertion point. + + // Try to split the block so it's fully aligned. Compute the latest split + // point where we can add a 4-byte branch instruction, and then + // WorstCaseAlign to LogAlign. + unsigned LogAlign = MF->getAlignment(); + assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + unsigned KnownBits = UserBBI.internalKnownBits(); + unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned BaseInsertOffset = UserOffset + U.MaxDisp; + DEBUG(dbgs() << format("Split in middle of big block before %#x", + BaseInsertOffset)); + + // Account for alignment and unknown padding. + BaseInsertOffset &= ~((1u << LogAlign) - 1); + BaseInsertOffset -= UPad; + + // The 4 in the following is for the unconditional branch we'll be inserting + // (allows for long branch on Thumb1). Alignment of the island is handled + // inside OffsetIsInRange. + BaseInsertOffset -= 4; + + DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) + << " la=" << LogAlign + << " kb=" << KnownBits + << " up=" << UPad << '\n'); + + // This could point off the end of the block if we've already got constant + // pool entries following this block; only the last one is in the water list. + // Back past any possible branches (allow for a conditional and a maximally + // long unconditional). + if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset) + BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset - + (isThumb1 ? 6 : 8); + unsigned EndInsertOffset = + WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + MachineInstr *LastIT = 0; + for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + Offset < BaseInsertOffset; + Offset += TII->GetInstSizeInBytes(MI), + MI = llvm::next(MI)) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + CPUser &U = CPUsers[CPUIndex]; + if (!OffsetIsInRange(Offset, EndInsertOffset, U)) { + // Shift intertion point by one unit of alignment so it is within reach. + BaseInsertOffset -= 1u << LogAlign; + EndInsertOffset -= 1u << LogAlign; + } + // This is overly conservative, as we don't account for CPEMIs being + // reused within the block, but it doesn't matter much. Also assume CPEs + // are added in order with alignment padding. We may eventually be able + // to pack the aligned CPEs better. + EndInsertOffset = RoundUpToAlignment(EndInsertOffset, + 1u << getCPELogAlign(U.CPEMI)) + + U.CPEMI->getOperand(2).getImm(); + CPUIndex++; } - DEBUG(errs() << "Split in middle of big block\n"); - --MI; + // Remember the last IT instruction. + if (MI->getOpcode() == ARM::t2IT) + LastIT = MI; + } - // Avoid splitting an IT block. - if (LastIT) { - unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); - if (CC != ARMCC::AL) - MI = LastIT; - } - NewMBB = SplitBlockBeforeInstr(MI); + --MI; + + // Avoid splitting an IT block. + if (LastIT) { + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + if (CC != ARMCC::AL) + MI = LastIT; } + NewMBB = SplitBlockBeforeInstr(MI); } /// HandleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, - unsigned CPUserIndex) { +bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; @@ -1260,11 +1358,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned ID = AFI->createPICLabelUId(); // Look for water where we can place this CPE. - MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; if (LookForWater(U, UserOffset, IP)) { - DEBUG(errs() << "found water in range\n"); + DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; // If the original WaterList entry was "new water" on this iteration, @@ -1279,7 +1377,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, } else { // No water found. - DEBUG(errs() << "No water found\n"); + DEBUG(dbgs() << "No water found\n"); CreateNewWater(CPUserIndex, UserOffset, NewMBB); // SplitBlockBeforeInstr adds to WaterList, which is important when it is @@ -1304,7 +1402,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, WaterList.erase(IP); // Okay, we know we can put an island before NewMBB now, do it! - MF.insert(NewMBB, NewIsland); + MF->insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. UpdateForInsertedWaterBlock(NewIsland); @@ -1320,13 +1418,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; - BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; - // Compensate for .align 2 in thumb mode. - if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm)) - Size += 2; + // Mark the basic block as aligned as required by the const-pool entry. + NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + // Increase the size of the island block to account for the new entry. - BBSizes[NewIsland->getNumber()] += Size; - AdjustBBOffsetsAfter(NewIsland, Size); + BBInfo[NewIsland->getNumber()].Size += Size; + AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1335,8 +1432,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, break; } - DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI - << '\t' << *UserMI); + DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI + << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); return true; } @@ -1347,19 +1444,18 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { MachineBasicBlock *CPEBB = CPEMI->getParent(); unsigned Size = CPEMI->getOperand(2).getImm(); CPEMI->eraseFromParent(); - BBSizes[CPEBB->getNumber()] -= Size; + BBInfo[CPEBB->getNumber()].Size -= Size; // All succeeding offsets have the current size value added in, fix this. if (CPEBB->empty()) { - // In thumb1 mode, the size of island may be padded by two to compensate for - // the alignment requirement. Then it will now be 2 when the block is - // empty, so fix this. - // All succeeding offsets have the current size value added in, fix this. - if (BBSizes[CPEBB->getNumber()] != 0) { - Size += BBSizes[CPEBB->getNumber()]; - BBSizes[CPEBB->getNumber()] = 0; - } - } - AdjustBBOffsetsAfter(CPEBB, -Size); + BBInfo[CPEBB->getNumber()].Size = 0; + + // This block no longer needs to be aligned. <rdar://problem/10534709>. + CPEBB->setAlignment(0); + } else + // Entries are sorted by descending alignment, so realign from the front. + CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + + AdjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if // this BB's predecessor jumps directly to this BB's successor. This // shouldn't happen currently. @@ -1390,9 +1486,9 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) { unsigned PCAdj = isThumb ? 4 : 8; unsigned BrOffset = GetOffsetOf(MI) + PCAdj; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; - DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber() + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " max delta=" << MaxDisp << " from " << GetOffsetOf(MI) << " to " << DestOffset @@ -1411,7 +1507,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, /// FixUpImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { +bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1420,8 +1516,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { return false; if (!Br.isCond) - return FixUpUnconditionalBr(MF, Br); - return FixUpConditionalBr(MF, Br); + return FixUpUnconditionalBr(Br); + return FixUpConditionalBr(Br); } /// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is @@ -1429,7 +1525,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); if (!isThumb1) @@ -1438,12 +1534,12 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; MI->setDesc(TII->get(ARM::tBfar)); - BBSizes[MBB->getNumber()] += 2; - AdjustBBOffsetsAfter(MBB, 2); + BBInfo[MBB->getNumber()].Size += 2; + AdjustBBOffsetsAfter(MBB); HasFarJump = true; ++NumUBrFixed; - DEBUG(errs() << " Changed B to long jump " << *MI); + DEBUG(dbgs() << " Changed B to long jump " << *MI); return true; } @@ -1452,7 +1548,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1487,7 +1583,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { - DEBUG(errs() << " Invert Bcc condition and swap its destination with " + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); MI->getOperand(0).setMBB(NewDest); @@ -1502,15 +1598,13 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); - BBSizes[MBB->getNumber()] -= delta; - MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB)); - AdjustBBOffsetsAfter(SplitBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); - // BBOffsets[SplitBB] is wrong temporarily, fixed below + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); - DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() + DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" << NextBB->getNumber() << "\n"); @@ -1519,23 +1613,20 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); if (isThumb) BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) .addImm(ARMCC::AL).addReg(0); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); // Remove the old conditional branch. It may or may not still be in MBB. - BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI); + BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); MI->eraseFromParent(); - - // The net size change is an addition of one unconditional branch. - int delta = TII->GetInstSizeInBytes(&MBB->back()); - AdjustBBOffsetsAfter(MBB, delta); + AdjustBBOffsetsAfter(MBB); return true; } @@ -1561,7 +1652,7 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2Instructions() { bool MadeChange = false; // Shrink ADR and LDR from constantpool. @@ -1598,19 +1689,19 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumT2CPShrunk; MadeChange = true; } } - MadeChange |= OptimizeThumb2Branches(MF); - MadeChange |= OptimizeThumb2JumpTables(MF); + MadeChange |= OptimizeThumb2Branches(); + MadeChange |= OptimizeThumb2JumpTables(); return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2Branches() { bool MadeChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { @@ -1639,8 +1730,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { Br.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = Br.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; MadeChange = true; } @@ -1663,7 +1754,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; if (CmpMI != Br.MI->getParent()->begin()) { @@ -1681,8 +1772,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { CmpMI->eraseFromParent(); Br.MI->eraseFromParent(); Br.MI = NewBR; - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + AdjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; } @@ -1696,12 +1787,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { /// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. -bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::OptimizeThumb2JumpTables() { bool MadeChange = false; // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1709,7 +1800,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1720,7 +1811,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { MachineBasicBlock *MBB = JTBBs[j]; - unsigned DstOffset = BBOffsets[MBB->getNumber()]; + unsigned DstOffset = BBInfo[MBB->getNumber()].Offset; // Negative offset is not ok. FIXME: We should change BB layout to make // sure all the branches are forward. if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2) @@ -1808,8 +1899,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MI->eraseFromParent(); int delta = OrigSize - NewSize; - BBSizes[MBB->getNumber()] -= delta; - AdjustBBOffsetsAfter(MBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; + AdjustBBOffsetsAfter(MBB); ++NumTBs; MadeChange = true; @@ -1821,10 +1912,10 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { /// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that /// jump tables always branch forwards, since that's what tbb and tbh need. -bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::ReorderThumb2JumpTables() { bool MadeChange = false; - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1832,7 +1923,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1864,8 +1955,6 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineBasicBlock *ARMConstantIslands:: AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { - MachineFunction &MF = *BB->getParent(); - // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple @@ -1882,22 +1971,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) // If the block ends in an unconditional branch, move it. The prior block // has to have an analyzable terminator for us to move this one. Be paranoid // and make sure we're not trying to move the entry block of the function. - if (!B && Cond.empty() && BB != MF.begin() && + if (!B && Cond.empty() && BB != MF->begin() && !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { BB->moveAfter(JTBB); OldPrior->updateTerminator(); BB->updateTerminator(); // Update numbering to account for the block being moved. - MF.RenumberBlocks(); + MF->RenumberBlocks(); ++NumJTMoved; return NULL; } // Create a new MBB for the code after the jump BB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(JTBB->getBasicBlock()); + MF->CreateMachineBasicBlock(JTBB->getBasicBlock()); MachineFunction::iterator MBBI = JTBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't @@ -1907,7 +1996,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) .addImm(ARMCC::AL).addReg(0); // Update internal data structures to account for the newly inserted MBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); // Update the CFG. NewBB->addSuccessor(BB); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fc464ea..01d772d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -61,7 +61,7 @@ namespace { void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs); + unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); }; @@ -129,12 +129,15 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,true}, -{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, true, SingleSpc, 2, 8,true}, +{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false}, +{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false}, +{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false}, +{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, +{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false}, { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, @@ -177,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16PseudoWB_fixed, ARM::VLD2d16wb_fixed, true, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16PseudoWB_register, ARM::VLD2d16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, { ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32PseudoWB_fixed, ARM::VLD2d32wb_fixed, true, true, false, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32PseudoWB_register, ARM::VLD2d32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, { ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8PseudoWB_fixed, ARM::VLD2d8wb_fixed, true, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8PseudoWB_register, ARM::VLD2d8wb_register, true, true, true, SingleSpc, 2, 8 ,false}, { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, -{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, -{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, -{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, @@ -267,10 +276,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, -{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, true, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true}, -{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true}, +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, { ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, { ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, @@ -296,19 +307,25 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,true}, -{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, - -{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,true}, -{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_fixed, ARM::VST2d16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d16PseudoWB_register, ARM::VST2d16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_fixed, ARM::VST2d32wb_fixed, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d32PseudoWB_register, ARM::VST2d32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_fixed, ARM::VST2d8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST2d8PseudoWB_register, ARM::VST2d8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, @@ -620,7 +637,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ /// register operands to real instructions with D register operands. void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs) { + unsigned Opc, bool IsExt) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); @@ -636,11 +653,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0).addReg(D1); - if (NumRegs > 2) - MIB.addReg(D2); - if (NumRegs > 3) - MIB.addReg(D3); + MIB.addReg(D0); // Copy the other source register operand. MIB.addOperand(MI.getOperand(OpIdx++)); @@ -1090,12 +1103,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8PseudoWB_fixed: + case ARM::VLD2d16PseudoWB_fixed: + case ARM::VLD2d32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8PseudoWB_register: + case ARM::VLD2d16PseudoWB_register: + case ARM::VLD2d32PseudoWB_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -1131,9 +1150,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD1DUPq8Pseudo: case ARM::VLD1DUPq16Pseudo: case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD1DUPq8PseudoWB_fixed: + case ARM::VLD1DUPq16PseudoWB_fixed: + case ARM::VLD1DUPq32PseudoWB_fixed: + case ARM::VLD1DUPq8PseudoWB_register: + case ARM::VLD1DUPq16PseudoWB_register: + case ARM::VLD1DUPq32PseudoWB_register: case ARM::VLD2DUPd8Pseudo: case ARM::VLD2DUPd16Pseudo: case ARM::VLD2DUPd32Pseudo: @@ -1173,12 +1195,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8Pseudo_UPD: - case ARM::VST2d16Pseudo_UPD: - case ARM::VST2d32Pseudo_UPD: - case ARM::VST2q8Pseudo_UPD: - case ARM::VST2q16Pseudo_UPD: - case ARM::VST2q32Pseudo_UPD: + case ARM::VST2d8PseudoWB_fixed: + case ARM::VST2d16PseudoWB_fixed: + case ARM::VST2d32PseudoWB_fixed: + case ARM::VST2q8PseudoWB_fixed: + case ARM::VST2q16PseudoWB_fixed: + case ARM::VST2q32PseudoWB_fixed: + case ARM::VST2d8PseudoWB_register: + case ARM::VST2d16PseudoWB_register: + case ARM::VST2d32PseudoWB_register: + case ARM::VST2q8PseudoWB_register: + case ARM::VST2q16PseudoWB_register: + case ARM::VST2q32PseudoWB_register: case ARM::VST3d8Pseudo: case ARM::VST3d16Pseudo: case ARM::VST3d32Pseudo: @@ -1186,7 +1214,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST3d8Pseudo_UPD: case ARM::VST3d16Pseudo_UPD: case ARM::VST3d32Pseudo_UPD: - case ARM::VST1d64TPseudo_UPD: + case ARM::VST1d64TPseudoWB_fixed: + case ARM::VST1d64TPseudoWB_register: case ARM::VST3q8Pseudo_UPD: case ARM::VST3q16Pseudo_UPD: case ARM::VST3q32Pseudo_UPD: @@ -1203,7 +1232,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST4d8Pseudo_UPD: case ARM::VST4d16Pseudo_UPD: case ARM::VST4d32Pseudo_UPD: - case ARM::VST1d64QPseudo_UPD: + case ARM::VST1d64QPseudoWB_fixed: + case ARM::VST1d64QPseudoWB_register: case ARM::VST4q8Pseudo_UPD: case ARM::VST4q16Pseudo_UPD: case ARM::VST4q32Pseudo_UPD: @@ -1291,12 +1321,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true; - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true; - case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true; - case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true; - case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true; + case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; + case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; } return false; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9bae422..a98dfc3 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -178,10 +178,12 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt, - bool allocReg); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + unsigned Alignment = 0, bool isZExt = true, + bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); @@ -227,8 +229,7 @@ class ARMFastISel : public FastISel { // we don't care about implicit defs here, just places we'll need to add a // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.hasOptionalDef()) + if (!MI->hasOptionalDef()) return false; // Look to see if our OptionalDef is defining CPSR or CCR. @@ -702,7 +703,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(SI->second) .addImm(0)); @@ -898,7 +899,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { ARM::GPRRegisterClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(Addr.Base.FI) .addImm(0)); @@ -937,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -950,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword load/stores and signed byte loads need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional + // operand. if (useAM3) { signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; MIB.addReg(0); @@ -963,10 +966,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, } bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, - bool isZExt = true, bool allocReg = true) { + unsigned Alignment, bool isZExt, bool allocReg) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; bool useAM3 = false; + bool needVMOV = false; TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. @@ -1012,10 +1016,25 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, RC = ARM::GPRRegisterClass; break; case MVT::f32: - Opc = ARM::VLDRS; - RC = TLI.getRegClassFor(VT); + if (!Subtarget->hasVFP2()) return false; + // Unaligned loads need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + needVMOV = true; + VT = MVT::i32; + Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + RC = ARM::GPRRegisterClass; + } else { + Opc = ARM::VLDRS; + RC = TLI.getRegClassFor(VT); + } break; case MVT::f64: + if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned loads need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + Opc = ARM::VLDRD; RC = TLI.getRegClassFor(VT); break; @@ -1030,6 +1049,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); + + // If we had an unaligned load of a float we've converted it to an regular + // load. Now we must move from the GRP to the FP register. + if (needVMOV) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVSR), MoveReg) + .addReg(ResultReg)); + ResultReg = MoveReg; + } return true; } @@ -1048,12 +1077,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; unsigned ResultReg; - if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; + if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) + return false; UpdateValueMap(I, ResultReg); return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment) { unsigned StrOpc; bool useAM3 = false; switch (VT.getSimpleVT().SimpleTy) { @@ -1101,10 +1132,26 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; - StrOpc = ARM::VSTRS; + // Unaligned stores need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRS), MoveReg) + .addReg(SrcReg)); + SrcReg = MoveReg; + VT = MVT::i32; + StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + } else { + StrOpc = ARM::VSTRS; + } break; case MVT::f64: if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned stores need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + StrOpc = ARM::VSTRD; break; } @@ -1141,7 +1188,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - if (!ARMEmitStore(VT, SrcReg, Addr)) return false; + if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment())) + return false; return true; } @@ -1360,7 +1408,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, unsigned SrcReg1 = getRegForValue(Src1Value); if (SrcReg1 == 0) return false; - unsigned SrcReg2; + unsigned SrcReg2 = 0; if (!UseImm) { SrcReg2 = getRegForValue(Src2Value); if (SrcReg2 == 0) return false; @@ -1577,7 +1625,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { (ARM_AM::getSOImmVal(Imm) != -1); } - unsigned Op2Reg; + unsigned Op2Reg = 0; if (!UseImm) { Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; @@ -1716,7 +1764,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard) + TM.Options.FloatABIType == FloatABI::Hard) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); @@ -1765,21 +1813,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::AExt: // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { - EVT DestVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); assert (ResultReg != 0 && "Failed to emit a sext"); Arg = ResultReg; + ArgVT = DestVT; break; } case CCValAssign::BCvt: { @@ -2456,7 +2506,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; unsigned ResultReg = MI->getOperand(0).getReg(); - if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false)) + if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) return false; MI->eraseFromParent(); return true; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2d1de6f..06944b1 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -37,7 +37,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // Always eliminate non-leaf frame pointers. - return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || + return ((MF.getTarget().Options.DisableFramePointerElim(MF) && + MFI->hasCalls()) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -309,8 +310,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getDesc().isReturn() && - "Can only insert epilog into returning blocks"); + assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 787f6a2..a5fd15b 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); if (!MI->isDebugValue()) { - if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1]) - return Hazard; - // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following // a VMLA / VMLS will cause 4 cycle stall. const MCInstrDesc &MCID = MI->getDesc(); @@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. - if (!LastMCID.isBarrier() && + if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) && + !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { @@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { void ARMHazardRecognizer::Reset() { LastMI = 0; FpMLxStalls = 0; - ITBlockSize = 0; ScoreboardHazardRecognizer::Reset(); } void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { MachineInstr *MI = SU->getInstr(); - unsigned Opcode = MI->getOpcode(); - if (ITBlockSize) { - --ITBlockSize; - } else if (Opcode == ARM::t2IT) { - unsigned Mask = MI->getOperand(1).getImm(); - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - ITBlockSize = 4 - NumTZ; - MachineBasicBlock::iterator I = MI; - for (unsigned i = 0; i < ITBlockSize; ++i) { - // Advance to the next instruction, skipping any dbg_value instructions. - do { - ++I; - } while (I->isDebugValue()); - ITBlockMIs[ITBlockSize-1-i] = &*I; - } - } - if (!MI->isDebugValue()) { LastMI = MI; FpMLxStalls = 0; diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index 2bc218d..98bfc4c 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -23,6 +23,10 @@ class ARMBaseRegisterInfo; class ARMSubtarget; class MachineInstr; +/// ARMHazardRecognizer handles special constraints that are not expressed in +/// the scheduling itinerary. This is only used during postRA scheduling. The +/// ARM preRA scheduler uses an unspecialized instance of the +/// ScoreboardHazardRecognizer. class ARMHazardRecognizer : public ScoreboardHazardRecognizer { const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; @@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer { MachineInstr *LastMI; unsigned FpMLxStalls; - unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. - MachineInstr *ITBlockMIs[4]; public: ARMHazardRecognizer(const InstrItineraryData *ItinData, @@ -40,7 +42,7 @@ public: const ARMSubtarget &sti, const ScheduleDAG *DAG) : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), - TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {} + TRI(tri), STI(sti), LastMI(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index bc8588f..7473141 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1579,6 +1579,22 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; + case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; + case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; + + case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register; + case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register; + case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register; + case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; + case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; + case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; + + case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; + case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; + case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; + case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; + case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -1646,13 +1662,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0 + // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode())) + if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) || + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) || !isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } @@ -1796,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0 + // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode())) + if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. @@ -2810,10 +2826,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD, - ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; - unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD, - ARM::VLD2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed, + ARM::VLD2d16PseudoWB_fixed, + ARM::VLD2d32PseudoWB_fixed, + ARM::VLD1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, + ARM::VLD2q16PseudoWB_fixed, + ARM::VLD2q32PseudoWB_fixed }; return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); } @@ -2876,16 +2895,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, - ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed}; - unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, - ARM::VST2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, + ARM::VST2d16PseudoWB_fixed, + ARM::VST2d32PseudoWB_fixed, + ARM::VST1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, + ARM::VST2q16PseudoWB_fixed, + ARM::VST2q32PseudoWB_fixed }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VST3_UPD: { unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD, - ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD }; + ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, ARM::VST3q16Pseudo_UPD, ARM::VST3q32Pseudo_UPD }; @@ -2897,7 +2919,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST4_UPD: { unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, - ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD }; + ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, ARM::VST4q16Pseudo_UPD, ARM::VST4q32Pseudo_UPD }; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8c4c06f..c6c1f5b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -72,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -namespace llvm { +namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, @@ -432,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else addRegisterClass(MVT::i32, ARM::GPRRegisterClass); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, ARM::DPRRegisterClass); @@ -467,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. + // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively + // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + // FIXME: Code duplication: FDIV and FREM are expanded always, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + // FIXME: Create unittest. + // In another words, find a way when "copysign" appears in DAG with vector + // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + // FIXME: Code duplication: SETCC has custom operation action, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); + // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -486,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FSIN, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); + setOperationAction(ISD::FPOW, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -586,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); + // These just redirect to CTTZ and CTLZ on ARM. + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -674,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); @@ -712,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } @@ -723,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FMA, MVT::f32, Expand); // Various VFP goodness - if (!UseSoftFloat && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { // int <-> fp are custom expanded into bit_convert + ARMISD ops. if (Subtarget->hasVFP2()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -751,7 +780,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(ARM::SP); - if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || + !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); @@ -1092,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, if (!Subtarget->isAAPCS_ABI()) return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); else if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard && !isVarArg) + getTargetMachine().Options.FloatABIType == FloatABI::Hard && + !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } @@ -2951,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - if (UnsafeFPMath && + if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { SDValue Result = OptimizeVFPBrcond(Op, DAG); @@ -3978,9 +4009,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. - if (VT == MVT::v2f32 || VT == MVT::v4f32) { - ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0)); - int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF()); + if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { + int ImmVal = ARM_AM::getFP32Imm(SplatBits); if (ImmVal != -1) { SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); @@ -6010,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { - if (!II->getDesc().isCall()) continue; + if (!II->isCall()) continue; DenseMap<unsigned, bool> DefRegs; for (MachineInstr::mop_iterator @@ -6421,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - const MCInstrDesc *MCID = &MI->getDesc(); - if (!MCID->hasPostISelHook()) { + if (!MI->hasPostISelHook()) { assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); return; } + const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's @@ -6454,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. - if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { + if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } @@ -7948,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return -0, so vmin can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; @@ -7970,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return +0, so vmax can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 6940156..80f3773 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -201,21 +201,29 @@ def msr_mask : Operand<i32> { // 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> // 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> // 64 64 - <imm> is encoded in imm6<5:0> +def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; } def shr_imm8 : Operand<i32> { let EncoderMethod = "getShiftRight8Imm"; let DecoderMethod = "DecodeShiftRight8Imm"; + let ParserMatchClass = shr_imm8_asm_operand; } +def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; } def shr_imm16 : Operand<i32> { let EncoderMethod = "getShiftRight16Imm"; let DecoderMethod = "DecodeShiftRight16Imm"; + let ParserMatchClass = shr_imm16_asm_operand; } +def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; } def shr_imm32 : Operand<i32> { let EncoderMethod = "getShiftRight32Imm"; let DecoderMethod = "DecodeShiftRight32Imm"; + let ParserMatchClass = shr_imm32_asm_operand; } +def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; } def shr_imm64 : Operand<i32> { let EncoderMethod = "getShiftRight64Imm"; let DecoderMethod = "DecodeShiftRight64Imm"; + let ParserMatchClass = shr_imm64_asm_operand; } //===----------------------------------------------------------------------===// @@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; +class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>; + + +class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasVFP2]>; +class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasNEON]>; //===----------------------------------------------------------------------===// // ARM Instruction templates. @@ -1994,73 +2010,111 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { // VFP/NEON Instruction aliases for type suffices. class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> : - InstAlias<!strconcat(opc, dt, asm), Result>; -multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> { - def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>; - def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>; - def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>; - def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>; -} -// VFPDT8ReqInstAlias plus plain ".8" -multiclass VFPDT8InstAlias<string opc, string asm, dag Result> { - def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>; - defm : VFPDT8ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> { - def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>; - def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>; - def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>; - def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>; -} -// VFPDT16ReqInstAlias plus plain ".16" -multiclass VFPDT16InstAlias<string opc, string asm, dag Result> { - def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>; - defm : VFPDT16ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> { - def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>; - def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>; - def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>; - def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>; - def F : VFPDataTypeInstAlias<opc, ".f", asm, Result>; -} -// VFPDT32ReqInstAlias plus plain ".32" -multiclass VFPDT32InstAlias<string opc, string asm, dag Result> { - def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>; - defm : VFPDT32ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> { - def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; - def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; - def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; - def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>; - def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; -} -// VFPDT64ReqInstAlias plus plain ".64" -multiclass VFPDT64InstAlias<string opc, string asm, dag Result> { - def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; - defm : VFPDT64ReqInstAlias<opc, asm, Result>; -} -multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> { - def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; - def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; - def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; - def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; -} -// VFPDT64ReqInstAlias plus plain ".64" -multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> { - def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; - defm : VFPDT64ReqInstAlias<opc, asm, Result>; -} + InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>; + multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { - defm : VFPDT8InstAlias<opc, asm, Result>; - defm : VFPDT16InstAlias<opc, asm, Result>; - defm : VFPDT32InstAlias<opc, asm, Result>; - defm : VFPDT64InstAlias<opc, asm, Result>; -} -multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> { - defm : VFPDT8InstAlias<opc, asm, Result>; - defm : VFPDT16InstAlias<opc, asm, Result>; - defm : VFPDT32InstAlias<opc, asm, Result>; - defm : VFPDT64NoF64InstAlias<opc, asm, Result>; -} + def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +} + +// The same alias classes using AsmPseudo instead, for the more complex +// stuff in NEON that InstAlias can't quite handle. +// Note that we can't use anonymous defm references here like we can +// above, as we care about the ultimate instruction enum names generated, unlike +// for instalias defs. +class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> : + AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>; +multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>; + def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>; + def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>; + def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>; +} +// NEONDT8ReqAsmPseudoInst plus plain ".8" +multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> { + def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>; + defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>; + def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>; + def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>; + def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>; +} +// NEONDT16ReqAsmPseudoInst plus plain ".16" +multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> { + def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>; + defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>; + def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>; + def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>; + def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>; + def F : NEONDataTypeAsmPseudoInst<opc, ".f", asm, iops>; +} +// NEONDT32ReqAsmPseudoInst plus plain ".32" +multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> { + def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>; + defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; + def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; + def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; + def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>; + def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; +} +// NEONDT64ReqAsmPseudoInst plus plain ".64" +multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> { + def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; + defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> { + def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>; + def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>; + def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>; + def D : NEONDataTypeAsmPseudoInst<opc, ".d", asm, iops>; +} +// NEONDT64ReqAsmPseudoInst plus plain ".64" +multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> { + def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>; + defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> { + defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>; +} +multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> { + defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>; + defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>; +} + +// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. +def : TokenAlias<".s8", ".i8">; +def : TokenAlias<".u8", ".i8">; +def : TokenAlias<".s16", ".i16">; +def : TokenAlias<".u16", ".i16">; +def : TokenAlias<".s32", ".i32">; +def : TokenAlias<".u32", ".i32">; +def : TokenAlias<".s64", ".i64">; +def : TokenAlias<".u64", ".i64">; + +def : TokenAlias<".i8", ".8">; +def : TokenAlias<".i16", ".16">; +def : TokenAlias<".i32", ".32">; +def : TokenAlias<".i64", ".64">; + +def : TokenAlias<".p8", ".8">; +def : TokenAlias<".p16", ".16">; + +def : TokenAlias<".f32", ".32">; +def : TokenAlias<".f64", ".64">; +def : TokenAlias<".f", ".f32">; +def : TokenAlias<".d", ".f64">; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index be03924..516a080 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -238,27 +238,23 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; -/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. -def imm1_15 : ImmLeaf<i32, [{ - return (int32_t)Imm >= 1 && (int32_t)Imm < 16; -}]>; - /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. def imm16_31 : ImmLeaf<i32, [{ return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; -def so_imm_neg : - PatLeaf<(imm), [{ +def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } +def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; - }], so_imm_neg_XFORM>; + }], so_imm_neg_XFORM> { + let ParserMatchClass = so_imm_neg_asmoperand; +} // Note: this pattern doesn't require an encoder method and such, as it's // only used on aliases (Pat<> and InstAlias<>). The actual encoding -// is handled by the destination instructions, which use t2_so_imm. +// is handled by the destination instructions, which use so_imm. def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } -def so_imm_not : - Operand<i32>, PatLeaf<(imm), [{ +def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; }], so_imm_not_XFORM> { let ParserMatchClass = so_imm_not_asmoperand; @@ -512,6 +508,14 @@ def arm_i32imm : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; +/// imm0_1 predicate - Immediate in the range [0,1]. +def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; } +def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } + +/// imm0_3 predicate - Immediate in the range [0,3]. +def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } +def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } + /// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ @@ -520,6 +524,42 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_7AsmOperand; } +/// imm8 predicate - Immediate is exactly 8. +def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; } +def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> { + let ParserMatchClass = Imm8AsmOperand; +} + +/// imm16 predicate - Immediate is exactly 16. +def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; } +def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> { + let ParserMatchClass = Imm16AsmOperand; +} + +/// imm32 predicate - Immediate is exactly 32. +def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; } +def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { + let ParserMatchClass = Imm32AsmOperand; +} + +/// imm1_7 predicate - Immediate in the range [1,7]. +def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } +def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { + let ParserMatchClass = Imm1_7AsmOperand; +} + +/// imm1_15 predicate - Immediate in the range [1,15]. +def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; } +def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> { + let ParserMatchClass = Imm1_15AsmOperand; +} + +/// imm1_31 predicate - Immediate in the range [1,31]. +def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; } +def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { + let ParserMatchClass = Imm1_31AsmOperand; +} + /// imm0_15 predicate - Immediate in the range [0,15]. def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ @@ -544,6 +584,14 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ let ParserMatchClass = Imm0_32AsmOperand; } +/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. +def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; } +def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 64; +}]> { + let ParserMatchClass = Imm0_63AsmOperand; +} + /// imm0_255 predicate - Immediate in the range [0,255]. def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { @@ -812,6 +860,9 @@ def addrmode6dup : Operand<i32>, let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); let EncoderMethod = "getAddrMode6DupAddressOpValue"; + // FIXME: This is close, but not quite right. The alignment specifier is + // different. + let ParserMatchClass = AddrMode6AsmOperand; } // addrmodepc := pc + reg @@ -2753,23 +2804,25 @@ defm STRHT : AI3strT<0b1011, "strht">; // Load / store multiple Instructions. // -multiclass arm_ldst_mult<string asm, bit L_bit, Format f, +multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f, InstrItinClass itin, InstrItinClass itin_upd> { // IA is the default, so no need for an explicit suffix on the // mnemonic here. Without it is the cannonical spelling. def IA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2778,16 +2831,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2796,16 +2851,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2814,16 +2871,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def IB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2834,10 +2893,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>; +defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>; +defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; } // neverHasSideEffects @@ -2851,6 +2912,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, RegConstraint<"$Rn = $wb">; +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; + + + //===----------------------------------------------------------------------===// // Move Instructions. // @@ -4999,6 +5070,32 @@ def : MnemonicAlias<"usubaddx", "usax">; // for isel. def : ARMInstAlias<"mov${s}${p} $Rd, $imm", (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", + (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +// Same for AND <--> BIC +def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", + (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", + (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rdn, $imm", + (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; + +// Likewise, "add Rd, so_imm_neg" -> sub +def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", + (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"add${s}${p} $Rd, $imm", + (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via so_imm_neg +def : ARMInstAlias<"cmp${p} $Rd, $imm", + (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; +def : ARMInstAlias<"cmn${p} $Rd, $imm", + (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; // The shifter forms of the MOV instruction are aliased to the ASR, LSL, // LSR, ROR, and RRX instructions. @@ -5056,4 +5153,8 @@ def : ARMInstAlias<"ror${s}${p} $Rn, $Rm", // 'mul' instruction can be specified with only two operands. def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", - (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; + (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", + (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f2ca963..c40860d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -74,9 +74,11 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ let MIOperandInfo = (ops i32imm); } +// Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { let Name = "VecListOneD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { let ParserMatchClass = VecListOneDAsmOperand; @@ -85,6 +87,7 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { def VecListTwoDAsmOperand : AsmOperandClass { let Name = "VecListTwoD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { let ParserMatchClass = VecListTwoDAsmOperand; @@ -93,6 +96,7 @@ def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { def VecListThreeDAsmOperand : AsmOperandClass { let Name = "VecListThreeD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { let ParserMatchClass = VecListThreeDAsmOperand; @@ -101,6 +105,7 @@ def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { def VecListFourDAsmOperand : AsmOperandClass { let Name = "VecListFourD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { let ParserMatchClass = VecListFourDAsmOperand; @@ -109,11 +114,92 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { def VecListTwoQAsmOperand : AsmOperandClass { let Name = "VecListTwoQ"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } -def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> { +def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> { let ParserMatchClass = VecListTwoQAsmOperand; } +// Register list of one D register, with "all lanes" subscripting. +def VecListOneDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListOneDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { + let ParserMatchClass = VecListOneDAllLanesAsmOperand; +} +// Register list of two D registers, with "all lanes" subscripting. +def VecListTwoDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListTwoDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { + let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +} + +// Register list of one D register, with byte lane subscripting. +def VecListOneDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListOneDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListOneDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of two D registers, with byte lane subscripting. +def VecListTwoDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -272,12 +358,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, rGPR:$offset), itin, "$addr.addr = $wb">; + class VLDQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + + class VLDQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, "$src = $dst">; @@ -462,31 +559,23 @@ defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> +class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), - (ins addrmode6:$Rn), IIC_VLD2, - "vld2", Dt, "$Vd, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs VdTy:$Vd), - (ins addrmode6:$Rn), IIC_VLD2x2, + (ins addrmode6:$Rn), itin, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>; -def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>; -def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; -def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>; -def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>; -def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; @@ -497,47 +586,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: -class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, - "vld2", Dt, "$Vd, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, - "vld2", Dt, "$Vd, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy, InstrItinClass itin> { + def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn), itin, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), itin, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; -def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; -def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; -def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>; -def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>; -def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>; +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; +def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; -def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; // ...with double-spaced registers -def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>; -def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; -def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; -def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -997,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), - IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", - [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), + (ins addrmode6dup:$Rn), + IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", + [(set VecListOneDAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1025,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1038,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; // ...with address register writeback: -class VLD1DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -class VLD1QDUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; -def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; -def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; -def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; -def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; -def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP<bits<4> op7_4, string Dt> @@ -1329,94 +1460,109 @@ def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; // ...with 3 registers class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + (ins addrmode6:$Rn, VecListThreeD:$Vd), + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D3WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8">; +def VST1d16T : VST1D3<{0,1,0,?}, "16">; +def VST1d32T : VST1D3<{1,0,0,?}, "32">; +def VST1d64T : VST1D3<{1,1,0,?}, "64">; -def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; -def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; -def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; -def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; -def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; +def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; // ...with 4 registers class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", + (ins addrmode6:$Rn, VecListFourD:$Vd), + IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D4WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, - "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D4WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8">; +def VST1d16Q : VST1D4<{0,1,?,?}, "16">; +def VST1d32Q : VST1D4<{1,0,?,?}, "32">; +def VST1d64Q : VST1D4<{1,1,?,?}, "64">; -def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; -def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; -def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; -def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; -def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; +def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), - IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; -} -class VST2Q<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { +class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; -def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; -def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; -def VST2q8 : VST2Q<{0,0,?,?}, "8">; -def VST2q16 : VST2Q<{0,1,?,?}, "16">; -def VST2q32 : VST2Q<{1,0,?,?}, "32">; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; @@ -1427,47 +1573,76 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: -class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy> { + def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, - "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; -def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; -def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; -def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; -def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; -def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; -def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; // ...with double-spaced registers -def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; -def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; -def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; -def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; -def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; -def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1741,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; // ...with address register writeback: class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, - "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []> { + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, + "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; } @@ -2573,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, + (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2805,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), - v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), +multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", v4i32, v2i32, ShOp>; } @@ -3477,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3574,7 +3746,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; @@ -4285,18 +4457,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, - ResTy, OpTy, OpNode> { + ResTy, OpTy, ImmTy, OpNode> { let Inst{21-16} = op21_16; let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", - v8i16, v8i8, NEONvshlli>; + v8i16, v8i8, imm8, NEONvshlli>; def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", - v4i32, v4i16, NEONvshlli>; + v4i32, v4i16, imm16, NEONvshlli>; def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", - v2i64, v2i32, NEONvshlli>; + v2i64, v2i32, imm32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", @@ -4469,10 +4641,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; def : InstAlias<"vmov${p} $Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; -defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -4932,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; // VEXT : Vector Extract -class VEXTd<string OpcodeStr, string Dt, ValueType Ty> +class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, + (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), - (Ty DPR:$Vm), imm:$index)))]> { + (Ty DPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -class VEXTq<string OpcodeStr, string Dt, ValueType Ty> +class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, + (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), - (Ty QPR:$Vm), imm:$index)))]> { + (Ty QPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -def VEXTd8 : VEXTd<"vext", "8", v8i8> { +def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { let Inst{11-8} = index{3-0}; } -def VEXTd16 : VEXTd<"vext", "16", v4i16> { +def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTd32 : VEXTd<"vext", "32", v2i32> { +def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } @@ -4968,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (i32 imm:$index))), (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; -def VEXTq8 : VEXTq<"vext", "8", v16i8> { +def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { let Inst{11-8} = index{3-0}; } -def VEXTq16 : VEXTq<"vext", "16", v8i16> { +def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTq32 : VEXTq<"vext", "32", v4i32> { +def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } +def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), @@ -5026,17 +5198,17 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; + (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; + (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), + (ins VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 def VTBL2Pseudo @@ -5056,18 +5228,18 @@ def VTBX1 let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; + (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), + (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 @@ -5207,11 +5379,83 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; // Assembler aliases // -// VAND/VEOR/VORR accept but do not require a type suffix. +def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; +def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; + + +// VADD two-operand aliases. +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSUB two-operand aliases. +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VADDW two-operand aliases. +def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", + (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", + (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", + (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", + (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", + (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", + (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; + +// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", @@ -5220,245 +5464,450 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; - -// VLD1 requires a size suffix, but also accepts type specific variants. -// Load one D register. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; - -// Load two D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, - rGPR:$Rm, pred:$p)>; - -// Load three D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; - - -// Load four D registers. -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", - (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", - (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", - (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg, - addrmode6:$Rn, rGPR:$Rm, pred:$p)>; - -// VST1 requires a size suffix, but also accepts type specific variants. -// Store one D register. -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, - VecListOneD:$Vd, pred:$p)>; - -// Store two D registers. -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", - (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -// with writeback, fixed stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", - (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; -// with writeback, register stride -defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q8wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q16wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q32wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; -defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", - (VST1q64wb_register zero_reg, addrmode6:$Rn, - rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; - -// FIXME: The three and four register VST1 instructions haven't been moved -// to the VecList* encoding yet, so we can't do assembly parsing support -// for them. Uncomment these when that happens. -// Load three D registers. -//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; -//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; - -// Load four D registers. -//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", -// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; - - -// VTRN instructions data type suffix aliases for more-specific types. -defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm", - (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm", - (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm", - (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; - -defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm", - (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>; -defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm", - (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>; -defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm", - (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>; +// ... two-operand aliases +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VMUL two-operand aliases. +def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", + (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", + (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", + (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", + (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", + (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", + (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", + (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", + (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", + (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", + (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", + (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", + (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +// VQADD (register) two-operand aliases. +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; + +// VSHL (register) two-operand aliases. +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +// VLD1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VMOV takes an optional datatype suffix +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", + (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", + (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", + (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", + (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", + (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", + (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", + (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", + (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", + (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", + (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", + (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", + (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", + (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", + (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", + (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", + (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", + (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", + (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", + (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", + (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", + (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", + (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", + (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", + (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", + (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", + (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", + (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", + (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// Two-operand variants for VEXT +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; + +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", + (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; + +// Two-operand variants for VQDMULH +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, +// these should restrict to just the Q register variants, but the register +// classes are enough to match correctly regardless, so we keep it simple +// and just use MnemonicAlias. +def : NEONMnemonicAlias<"vbicq", "vbic">; +def : NEONMnemonicAlias<"vandq", "vand">; +def : NEONMnemonicAlias<"veorq", "veor">; +def : NEONMnemonicAlias<"vorrq", "vorr">; + +def : NEONMnemonicAlias<"vmovq", "vmov">; +def : NEONMnemonicAlias<"vmvnq", "vmvn">; +// Explicit versions for floating point so that the FPImm variants get +// handled early. The parser gets confused otherwise. +def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; +def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; + +def : NEONMnemonicAlias<"vaddq", "vadd">; +def : NEONMnemonicAlias<"vsubq", "vsub">; + +def : NEONMnemonicAlias<"vminq", "vmin">; +def : NEONMnemonicAlias<"vmaxq", "vmax">; + +def : NEONMnemonicAlias<"vmulq", "vmul">; + +def : NEONMnemonicAlias<"vabsq", "vabs">; + +def : NEONMnemonicAlias<"vshlq", "vshl">; +def : NEONMnemonicAlias<"vshrq", "vshr">; + +def : NEONMnemonicAlias<"vcvtq", "vcvt">; + +def : NEONMnemonicAlias<"vcleq", "vcle">; +def : NEONMnemonicAlias<"vceqq", "vceq">; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index c6cc98d..ac1a229 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1131,9 +1131,6 @@ def tRSB : // A8.6.141 "rsb", "\t$Rd, $Rn, #0", [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; -def : tInstAlias<"neg${s}${p} $Rd, $Rm", - (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; - // Subtract with carry register let Uses = [CPSR] in def tSBC : // A8.6.151 @@ -1435,3 +1432,8 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; // nothing). def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : tInstAlias<"neg${s}${p} $Rd, $Rm", + (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 6129fa3..981592c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -80,18 +80,19 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ // only used on aliases (Pat<> and InstAlias<>). The actual encoding // is handled by the destination instructions, which use t2_so_imm. def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; } -def t2_so_imm_not : Operand<i32>, - PatLeaf<(imm), [{ +def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_not_XFORM> { let ParserMatchClass = t2_so_imm_not_asmoperand; } // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. -def t2_so_imm_neg : Operand<i32>, - PatLeaf<(imm), [{ +def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } +def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; -}], t2_so_imm_neg_XFORM>; +}], t2_so_imm_neg_XFORM> { + let ParserMatchClass = t2_so_imm_neg_asmoperand; +} /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand<i32>, @@ -1333,7 +1334,7 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), let mayStore = 1, neverHasSideEffects = 1 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins GPRnopc:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { @@ -1357,13 +1358,13 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), } // mayStore = 1, neverHasSideEffects = 1 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, addr_offset_none:$Rn, + (ins GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStore_iu, "str", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, - (post_store rGPR:$Rt, addr_offset_none:$Rn, + (post_store GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset))]>; def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), @@ -3971,6 +3972,18 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +// STMIA/STMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"stm${p} $Rn, $regs", + (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"stm${p} $Rn!, $regs", + (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + +// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"ldm${p} $Rn, $regs", + (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"ldm${p} $Rn!, $regs", + (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + // STMDB/STMDB_UPD aliases w/ the optional .w suffix def : t2InstAlias<"stmdb${p}.w $Rn, $regs", (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>; @@ -4084,8 +4097,50 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", // for isel. def : t2InstAlias<"mov${p} $Rd, $imm", (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +def : t2InstAlias<"mvn${p} $Rd, $imm", + (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +// Same for AND <--> BIC +def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"bic${s}${p} $Rdn, $imm", + (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", + (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rdn, $imm", + (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +// Likewise, "add Rd, t2_so_imm_neg" -> sub +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rd, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via t2_so_imm_neg +def : t2InstAlias<"cmp${p} $Rd, $imm", + (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; +def : t2InstAlias<"cmn${p} $Rd, $imm", + (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; // Wide 'mul' encoding can be specified with only two operands. def : t2InstAlias<"mul${p} $Rn, $Rm", - (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>; + (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : t2InstAlias<"neg${s}${p} $Rd, $Rm", + (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for +// these, unfortunately. +def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; + +// ADR w/o the .w suffix +def : t2InstAlias<"adr${p} $Rd, $addr", + (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e420135..5d43556 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1160,18 +1160,64 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // +// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// support them all, but supporting at least some of the basics is +// good to be friendly. +def : VFP2MnemonicAlias<"flds", "vldr">; +def : VFP2MnemonicAlias<"fldd", "vldr">; +def : VFP2MnemonicAlias<"fmrs", "vmov">; +def : VFP2MnemonicAlias<"fmsr", "vmov">; +def : VFP2MnemonicAlias<"fsqrts", "vsqrt">; +def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">; +def : VFP2MnemonicAlias<"fadds", "vadd.f32">; +def : VFP2MnemonicAlias<"faddd", "vadd.f64">; +def : VFP2MnemonicAlias<"fmrdd", "vmov">; +def : VFP2MnemonicAlias<"fmrds", "vmov">; +def : VFP2MnemonicAlias<"fmrrd", "vmov">; +def : VFP2MnemonicAlias<"fmdrr", "vmov">; +def : VFP2MnemonicAlias<"fmuld", "vmul.f64">; +def : VFP2MnemonicAlias<"fnegs", "vneg.f32">; +def : VFP2MnemonicAlias<"fnegd", "vneg.f64">; +def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">; +def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">; +def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">; +def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">; +def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">; +def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">; +def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">; +def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">; +def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">; +def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">; +def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">; +def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">; +def : VFP2MnemonicAlias<"fsts", "vstr">; +def : VFP2MnemonicAlias<"fstd", "vstr">; +def : VFP2MnemonicAlias<"fmacd", "vmla.f64">; +def : VFP2MnemonicAlias<"fmacs", "vmla.f32">; def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; +def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", + (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", + (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; + +// No need for the size suffix on VSQRT. It's implied by the register classes. +def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; // VLDR/VSTR accept an optional type suffix. -defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr", - (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr", - (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", - (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", - (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr", + (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr", + (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr", + (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr", + (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; // VMUL has a two-operand form (implied destination operand) def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c8728f4..6712fb6 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -1471,19 +1472,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) + if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && MCID.mayStore()) + if (isLd && I->mayStore()) return false; if (!isLd) { - if (MCID.mayLoad()) + if (I->mayLoad()) return false; // It's not safe to move the first 'str' down. // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (MCID.mayStore()) + if (I->mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { @@ -1773,8 +1773,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { while (MBBI != E) { for (; MBBI != E; ++MBBI) { MachineInstr *MI = MBBI; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isCall() || MCID.isTerminator()) { + if (MI->isCall() || MI->isTerminator()) { // Stop at barriers. ++MBBI; break; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 6cbb24b..61b75cb 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -38,22 +38,25 @@ extern "C" void LLVMInitializeARMTarget() { /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Soft; + if (Options.FloatABIType == FloatABI::Default) + this->Options.FloatABIType = FloatABI::Soft; } ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget), + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : @@ -73,9 +76,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), @@ -143,10 +147,16 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { } bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { - if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) - PM.add(createThumb2SizeReductionPass()); + if (Subtarget.isThumb2()) { + if (!Subtarget.prefers32BitThumb()) + PM.add(createThumb2SizeReductionPass()); + + // Constant island pass work on unbundled instructions. + PM.add(createUnpackMachineBundlesPass()); + } PM.add(createARMConstantIslandPass()); + return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index a1f517b..cd77822 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -41,6 +41,7 @@ private: public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); @@ -71,6 +72,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); @@ -112,6 +114,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 19defa1..721a225 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -36,6 +36,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); + StructorOutputOrder = Structors::PriorityOrder; LSDASection = NULL; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bb83e5e..cd86065 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -39,10 +39,15 @@ namespace { class ARMOperand; +enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; + class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + // Map of register aliases registers via the .req directive. + StringMap<unsigned> RegisterReqs; + struct { ARMCC::CondCodes Cond; // Condition for IT block. unsigned Mask:4; // Condition mask for instructions. @@ -90,9 +95,12 @@ class ARMAsmParser : public MCTargetAsmParser { unsigned &ShiftAmount); bool parseDirectiveWord(unsigned Size, SMLoc L); bool parseDirectiveThumb(SMLoc L); + bool parseDirectiveARM(SMLoc L); bool parseDirectiveThumbFunc(SMLoc L); bool parseDirectiveCode(SMLoc L); bool parseDirectiveSyntax(SMLoc L); + bool parseDirectiveReq(StringRef Name, SMLoc L); + bool parseDirectiveUnreq(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -161,6 +169,7 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, @@ -271,6 +280,8 @@ class ARMOperand : public MCParsedAsmOperand { k_DPRRegisterList, k_SPRRegisterList, k_VectorList, + k_VectorListAllLanes, + k_VectorListIndexed, k_ShiftedRegister, k_ShiftedImmediate, k_ShifterImmediate, @@ -324,6 +335,8 @@ class ARMOperand : public MCParsedAsmOperand { struct { unsigned RegNum; unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; } VectorList; struct { @@ -409,6 +422,8 @@ public: Registers = o.Registers; break; case k_VectorList: + case k_VectorListAllLanes: + case k_VectorListIndexed: VectorList = o.VectorList; break; case k_CoprocNum: @@ -562,6 +577,22 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } + bool isImm0_1() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 2; + } + bool isImm0_3() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4; + } bool isImm0_7() const { if (Kind != k_Immediate) return false; @@ -586,6 +617,94 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } + bool isImm0_63() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 64; + } + bool isImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 8; + } + bool isImm16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 16; + } + bool isImm32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 32; + } + bool isShrImm8() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 8; + } + bool isShrImm16() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 16; + } + bool isShrImm32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 32; + } + bool isShrImm64() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 64; + } + bool isImm1_7() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 8; + } + bool isImm1_15() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 16; + } + bool isImm1_31() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 32; + } bool isImm1_16() const { if (Kind != k_Immediate) return false; @@ -676,6 +795,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(~Value) != -1; } + bool isARMSOImmNeg() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(-Value) != -1; + } bool isT2SOImm() const { if (Kind != k_Immediate) return false; @@ -692,6 +819,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(~Value) != -1; } + bool isT2SOImmNeg() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(-Value) != -1; + } bool isSetEndImm() const { if (Kind != k_Immediate) return false; @@ -892,9 +1027,9 @@ public: if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [-255, -1]. - if (!Memory.OffsetImm) return true; + if (!Memory.OffsetImm) return false; int64_t Val = Memory.OffsetImm->getValue(); - return Val > -256 && Val < 0; + return (Val == INT32_MIN) || (Val > -256 && Val < 0); } bool isMemUImm12Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -940,31 +1075,75 @@ public: bool isProcIFlags() const { return Kind == k_ProcIFlags; } // NEON operands. + bool isSingleSpacedVectorList() const { + return Kind == k_VectorList && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorList() const { + return Kind == k_VectorList && VectorList.isDoubleSpaced; + } bool isVecListOneD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 1; } bool isVecListTwoD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 2; } bool isVecListThreeD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 3; } bool isVecListFourD() const { - if (Kind != k_VectorList) return false; + if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 4; } bool isVecListTwoQ() const { - if (Kind != k_VectorList) return false; - //FIXME: We haven't taught the parser to handle by-two register lists - // yet, so don't pretend to know one. - return VectorList.Count == 2 && false; + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 2; + } + + bool isVecListOneDAllLanes() const { + if (Kind != k_VectorListAllLanes) return false; + return VectorList.Count == 1; + } + + bool isVecListTwoDAllLanes() const { + if (Kind != k_VectorListAllLanes) return false; + return VectorList.Count == 2; + } + + bool isVecListOneDByteIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 7; + } + + bool isVecListOneDHWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 3; + } + + bool isVecListOneDWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 1; + } + + bool isVecListTwoDByteIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 7; + } + + bool isVecListTwoDHWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 3; + } + + bool isVecListTwoDWordIndexed() const { + if (Kind != k_VectorListIndexed) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 1; } bool isVectorIndex8() const { @@ -1233,6 +1412,14 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } + void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a t2_so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -1241,6 +1428,14 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } + void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -1527,37 +1722,15 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); } - void addVecListOneDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListTwoDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListThreeDOperands(MCInst &Inst, unsigned N) const { + void addVecListOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); } - void addVecListFourDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); - } - - void addVecListTwoQOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // Only the first register actually goes on the instruction. The rest - // are implied by the opcode. + void addVecListIndexedOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex)); } void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { @@ -1780,10 +1953,32 @@ public: } static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count, - SMLoc S, SMLoc E) { + bool isDoubleSpaced, SMLoc S, SMLoc E) { ARMOperand *Op = new ARMOperand(k_VectorList); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, + unsigned Index, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListIndexed); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.LaneIndex = Index; Op->StartLoc = S; Op->EndLoc = E; return Op; @@ -1982,6 +2177,14 @@ void ARMOperand::print(raw_ostream &OS) const { OS << "<vector_list " << VectorList.Count << " * " << VectorList.RegNum << ">"; break; + case k_VectorListAllLanes: + OS << "<vector_list(all lanes) " << VectorList.Count << " * " + << VectorList.RegNum << ">"; + break; + case k_VectorListIndexed: + OS << "<vector_list(lane " << VectorList.LaneIndex << ") " + << VectorList.Count << " * " << VectorList.RegNum << ">"; + break; case k_Token: OS << "'" << getToken() << "'"; break; @@ -2000,7 +2203,9 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + StartLoc = Parser.getTok().getLoc(); RegNo = tryParseRegister(); + EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -2013,8 +2218,6 @@ int ARMAsmParser::tryParseRegister() { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return -1; - // FIXME: Validate register for the current architecture; we have to do - // validation later, so maybe there is no need for this here. std::string lowerCase = Tok.getString().lower(); unsigned RegNum = MatchRegisterName(lowerCase); if (!RegNum) { @@ -2023,9 +2226,34 @@ int ARMAsmParser::tryParseRegister() { .Case("r14", ARM::LR) .Case("r15", ARM::PC) .Case("ip", ARM::R12) + // Additional register name aliases for 'gas' compatibility. + .Case("a1", ARM::R0) + .Case("a2", ARM::R1) + .Case("a3", ARM::R2) + .Case("a4", ARM::R3) + .Case("v1", ARM::R4) + .Case("v2", ARM::R5) + .Case("v3", ARM::R6) + .Case("v4", ARM::R7) + .Case("v5", ARM::R8) + .Case("v6", ARM::R9) + .Case("v7", ARM::R10) + .Case("v8", ARM::R11) + .Case("sb", ARM::R9) + .Case("sl", ARM::R10) + .Case("fp", ARM::R11) .Default(0); } - if (!RegNum) return -1; + if (!RegNum) { + // Check for aliases registered via .req. + StringMap<unsigned>::const_iterator Entry = + RegisterReqs.find(Tok.getIdentifier()); + // If no match, return failure. + if (Entry == RegisterReqs.end()) + return -1; + Parser.Lex(); // Eat identifier token. + return Entry->getValue(); + } Parser.Lex(); // Eat identifier token. @@ -2045,6 +2273,7 @@ int ARMAsmParser::tryParseShiftRegister( std::string lowerCase = Tok.getString().lower(); ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) + .Case("asl", ARM_AM::lsl) .Case("lsl", ARM_AM::lsl) .Case("lsr", ARM_AM::lsr) .Case("asr", ARM_AM::asr) @@ -2073,7 +2302,8 @@ int ARMAsmParser::tryParseShiftRegister( ShiftReg = SrcReg; } else { // Figure out if this is shifted by a constant or a register (for non-RRX). - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; @@ -2446,6 +2676,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); int OldReg = Reg; + const AsmToken RegTok = Parser.getTok(); Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); @@ -2459,8 +2690,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg)) + if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) return Error(RegLoc, "register list not in ascending order"); + if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + continue; + } // VFP register lists must also be contiguous. // It's OK to use the enumeration values directly here rather, as the // VFP register classes have the enum sorted properly. @@ -2477,13 +2713,55 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(E, "'}' expected"); Parser.Lex(); // Eat '}' token. + // Push the register list operand. Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); + + // The ARM system instruction variants for LDM/STM have a '^' token here. + if (Parser.getTok().is(AsmToken::Caret)) { + Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc())); + Parser.Lex(); // Eat '^' token. + } + return false; } +// Helper function to parse the lane index for vector lists. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { + Index = 0; // Always return a defined index value. + if (Parser.getTok().is(AsmToken::LBrac)) { + Parser.Lex(); // Eat the '['. + if (Parser.getTok().is(AsmToken::RBrac)) { + // "Dn[]" is the 'all lanes' syntax. + LaneKind = AllLanes; + Parser.Lex(); // Eat the ']'. + return MatchOperand_Success; + } + if (Parser.getTok().is(AsmToken::Integer)) { + int64_t Val = Parser.getTok().getIntVal(); + // Make this range check context sensitive for .8, .16, .32. + if (Val < 0 && Val > 7) + Error(Parser.getTok().getLoc(), "lane index out of range"); + Index = Val; + LaneKind = IndexedLane; + Parser.Lex(); // Eat the token; + if (Parser.getTok().isNot(AsmToken::RBrac)) + Error(Parser.getTok().getLoc(), "']' expected"); + Parser.Lex(); // Eat the ']'. + return MatchOperand_Success; + } + Error(Parser.getTok().getLoc(), "lane index must be empty or an integer"); + return MatchOperand_ParseFail; + } + LaneKind = NoLanes; + return MatchOperand_Success; +} + // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + VectorLaneTy LaneKind; + unsigned LaneIndex; SMLoc S = Parser.getTok().getLoc(); // As an extension (to match gas), support a plain D register or Q register // (without encosing curly braces) as a single or double entry list, @@ -2494,12 +2772,48 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; SMLoc E = Parser.getTok().getLoc(); if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { - Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E)); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind!"); + case NoLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1, + LaneIndex, S,E)); + break; + } return MatchOperand_Success; } if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E)); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind!"); + case NoLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2, + LaneIndex, S,E)); + break; + } return MatchOperand_Success; } Error(S, "vector register expected"); @@ -2518,18 +2832,30 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } unsigned Count = 1; + int Spacing = 0; unsigned FirstReg = Reg; // The list is of D registers, but we also allow Q regs and just interpret // them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { FirstReg = Reg = getDRegFromQReg(Reg); + Spacing = 1; // double-spacing requires explicit D registers, otherwise + // it's ambiguous with four-register single spaced. ++Reg; ++Count; } + if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(Parser.getTok().getLoc(), + "sequential registers in double spaced list"); + return MatchOperand_ParseFail; + } Parser.Lex(); // Eat the minus. SMLoc EndLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); @@ -2554,6 +2880,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(EndLoc, "bad range in register list"); return MatchOperand_ParseFail; } + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + EndLoc = Parser.getTok().getLoc(); // Add all the registers in the range to the register list. Count += EndReg - Reg; @@ -2575,6 +2911,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // The list is of D registers, but we also allow Q regs and just interpret // them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(RegLoc, + "invalid register in double-spaced list (must be 'D' register')"); + return MatchOperand_ParseFail; + } Reg = getDRegFromQReg(Reg); if (Reg != OldReg + 1) { Error(RegLoc, "non-contiguous register range"); @@ -2582,14 +2925,45 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } ++Reg; Count += 2; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } continue; } - // Normal D register. Just check that it's contiguous and keep going. - if (Reg != OldReg + 1) { + // Normal D register. + // Figure out the register spacing (single or double) of the list if + // we don't know it already. + if (!Spacing) + Spacing = 1 + (Reg == OldReg + 2); + + // Just check that it's contiguous and keep going. + if (Reg != OldReg + Spacing) { Error(RegLoc, "non-contiguous register range"); return MatchOperand_ParseFail; } ++Count; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + if (Spacing == 2 && LaneKind != NoLanes) { + Error(EndLoc, + "lane index specfier invalid in double spaced register list"); + return MatchOperand_ParseFail; + } } SMLoc E = Parser.getTok().getLoc(); @@ -2599,7 +2973,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } Parser.Lex(); // Eat '}' token. - Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, S, E)); + switch (LaneKind) { + default: + assert(0 && "unexpected lane kind in register list."); + case NoLanes: + Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, + (Spacing == 2), S, E)); + break; + case AllLanes: + Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, + S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count, + LaneIndex, S, E)); + break; + } return MatchOperand_Success; } @@ -2786,7 +3175,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, Parser.Lex(); // Eat shift type token. // There must be a '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2864,7 +3254,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2924,7 +3315,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a rotate amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2961,7 +3353,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2993,7 +3386,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -3087,7 +3481,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Tok.getLoc(); // Do immediates first, as we always parse those if we have a '#'. - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat the '#'. // Explicitly look for a '-', as we need to encode negative zero // differently. @@ -3444,7 +3839,7 @@ bool ARMAsmParser:: cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); // Vn @@ -3458,7 +3853,7 @@ bool ARMAsmParser:: cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Vd - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // Create a writeback register dummy placeholder. Inst.addOperand(MCOperand::CreateImm(0)); // Vn @@ -3478,7 +3873,7 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, // Vn ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); // Vt - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; @@ -3494,7 +3889,7 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, // Vm ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); // Vt - ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); // pred ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); return true; @@ -3591,8 +3986,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // offset. Be friendly and also accept a plain integer (without a leading // hash) for gas compatibility. if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar) || Parser.getTok().is(AsmToken::Integer)) { - if (Parser.getTok().is(AsmToken::Hash)) + if (Parser.getTok().isNot(AsmToken::Integer)) Parser.Lex(); // Eat the '#'. E = Parser.getTok().getLoc(); @@ -3690,7 +4086,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, if (Tok.isNot(AsmToken::Identifier)) return true; StringRef ShiftName = Tok.getString(); - if (ShiftName == "lsl" || ShiftName == "LSL") + if (ShiftName == "lsl" || ShiftName == "LSL" || + ShiftName == "asl" || ShiftName == "ASL") St = ARM_AM::lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") St = ARM_AM::lsr; @@ -3710,7 +4107,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, Loc = Parser.getTok().getLoc(); // A '#' and a shift amount. const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) + if (HashTok.isNot(AsmToken::Hash) && + HashTok.isNot(AsmToken::Dollar)) return Error(HashTok.getLoc(), "'#' expected"); Parser.Lex(); // Eat hash token. @@ -3739,7 +4137,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Hash)) + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; // Disambiguate the VMOV forms that can accept an FP immediate. @@ -3852,6 +4251,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return parseMemory(Operands); case AsmToken::LCurly: return parseRegisterList(Operands); + case AsmToken::Dollar: case AsmToken::Hash: { // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate @@ -3990,7 +4390,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || - Mnemonic == "vrsqrts" || Mnemonic == "srs" || + Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || + Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || + Mnemonic == "fsts" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4206,9 +4608,27 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } +static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Apply mnemonic aliases before doing anything else, as the destination + // mnemnonic may include suffices and we want to handle them normally. + // The generic tblgen'erated code does this later, at the start of + // MatchInstructionImpl(), but that's too late for aliases that include + // any sort of suffix. + unsigned AvailableFeatures = getAvailableFeatures(); + applyMnemonicAliases(Name, AvailableFeatures); + + // First check for the ARM-specific .req directive. + if (Parser.getTok().is(AsmToken::Identifier) && + Parser.getTok().getIdentifier() == ".req") { + parseDirectiveReq(Name, NameLoc); + // We always return 'error' for this, as we're done with this + // statement and don't need to match the 'instruction." + return true; + } + // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); StringRef Mnemonic = Name.slice(Start, Next); @@ -4400,12 +4820,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, } } // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the - // end. Convert it to a token here. + // end. Convert it to a token here. Take care not to convert those + // that should hit the Thumb2 encoding. if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[5])->isImm()) { ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { + if (CE && CE->getValue() == 0 && + (isThumbOne() || + // The cc_out operand matches the IT block. + ((inITBlock() != CarrySetting) && + // Neither register operand is a high register. + (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && + isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){ Operands.erase(Operands.begin() + 5); Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); delete Op; @@ -4605,11 +5034,495 @@ validateInstruction(MCInst &Inst, return false; } +static unsigned getRealVSTLNOpcode(unsigned Opc) { + switch(Opc) { + default: assert(0 && "unexpected opcode!"); + // VST1LN + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: + case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: + case ARM::VST1LNdWB_fixed_Asm_U8: + return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_fixed_Asm_16: case ARM::VST1LNdWB_fixed_Asm_P16: + case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16: + case ARM::VST1LNdWB_fixed_Asm_U16: + return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: + case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: + case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: + return ARM::VST1LNd32_UPD; + case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: + case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: + case ARM::VST1LNdWB_register_Asm_U8: + return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_register_Asm_16: case ARM::VST1LNdWB_register_Asm_P16: + case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16: + case ARM::VST1LNdWB_register_Asm_U16: + return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: + case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: + case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: + return ARM::VST1LNd32_UPD; + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: + case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8: + case ARM::VST1LNdAsm_U8: + return ARM::VST1LNd8; + case ARM::VST1LNdAsm_16: case ARM::VST1LNdAsm_P16: + case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: + case ARM::VST1LNdAsm_U16: + return ARM::VST1LNd16; + case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: + case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: + case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32: + return ARM::VST1LNd32; + + // VST2LN + case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: + case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: + case ARM::VST2LNdWB_fixed_Asm_U8: + return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_fixed_Asm_16: case ARM::VST2LNdWB_fixed_Asm_P16: + case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16: + case ARM::VST2LNdWB_fixed_Asm_U16: + return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: + case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: + case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: + return ARM::VST2LNd32_UPD; + case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: + case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: + case ARM::VST2LNdWB_register_Asm_U8: + return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_register_Asm_16: case ARM::VST2LNdWB_register_Asm_P16: + case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16: + case ARM::VST2LNdWB_register_Asm_U16: + return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: + case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: + case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: + return ARM::VST2LNd32_UPD; + case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: + case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8: + case ARM::VST2LNdAsm_U8: + return ARM::VST2LNd8; + case ARM::VST2LNdAsm_16: case ARM::VST2LNdAsm_P16: + case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: + case ARM::VST2LNdAsm_U16: + return ARM::VST2LNd16; + case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: + case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: + case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32: + return ARM::VST2LNd32; + } +} + +static unsigned getRealVLDLNOpcode(unsigned Opc) { + switch(Opc) { + default: assert(0 && "unexpected opcode!"); + // VLD1LN + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: + case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: + case ARM::VLD1LNdWB_fixed_Asm_U8: + return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_fixed_Asm_16: case ARM::VLD1LNdWB_fixed_Asm_P16: + case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16: + case ARM::VLD1LNdWB_fixed_Asm_U16: + return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: + case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: + case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: + return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: + case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: + case ARM::VLD1LNdWB_register_Asm_U8: + return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_register_Asm_16: case ARM::VLD1LNdWB_register_Asm_P16: + case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16: + case ARM::VLD1LNdWB_register_Asm_U16: + return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: + case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: + case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: + return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: + case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8: + case ARM::VLD1LNdAsm_U8: + return ARM::VLD1LNd8; + case ARM::VLD1LNdAsm_16: case ARM::VLD1LNdAsm_P16: + case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: + case ARM::VLD1LNdAsm_U16: + return ARM::VLD1LNd16; + case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: + case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: + case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32: + return ARM::VLD1LNd32; + + // VLD2LN + case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: + case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: + case ARM::VLD2LNdWB_fixed_Asm_U8: + return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_fixed_Asm_16: case ARM::VLD2LNdWB_fixed_Asm_P16: + case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16: + case ARM::VLD2LNdWB_fixed_Asm_U16: + return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: + case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: + case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: + return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: + case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: + case ARM::VLD2LNdWB_register_Asm_U8: + return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_register_Asm_16: case ARM::VLD2LNdWB_register_Asm_P16: + case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16: + case ARM::VLD2LNdWB_register_Asm_U16: + return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: + case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: + case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: + return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: + case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8: + case ARM::VLD2LNdAsm_U8: + return ARM::VLD2LNd8; + case ARM::VLD2LNdAsm_16: case ARM::VLD2LNdAsm_P16: + case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: + case ARM::VLD2LNdAsm_U16: + return ARM::VLD2LNd16; + case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: + case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: + case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32: + return ARM::VLD2LNd32; + } +} + bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { - // Handle the MOV complex aliases. + // Handle NEON VST complex aliases. + case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8: + case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8: + case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16: + case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16: + case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16: + case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F: + case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32: + case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8: + case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8: + case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16: + case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16: + case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16: + case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F: + case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32: + case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8: + case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8: + case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16: + case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16: + case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16: + case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F: + case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32: + case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8: + case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8: + case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16: + case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16: + case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16: + case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F: + case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32: + case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8: + case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16: + case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16: + case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F: + case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32: + case ARM::VST1LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8: + case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16: + case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16: + case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F: + case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32: + case ARM::VST2LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle NEON VLD complex aliases. + case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8: + case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8: + case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16: + case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16: + case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16: + case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F: + case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32: + case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8: + case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8: + case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16: + case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16: + case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16: + case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F: + case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32: + case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8: + case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8: + case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16: + case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16: + case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16: + case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F: + case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32: + case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8: + case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8: + case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16: + case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16: + case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16: + case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F: + case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32: + case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8: + case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16: + case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16: + case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F: + case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32: + case ARM::VLD1LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8: + case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16: + case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16: + case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F: + case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32: + case ARM::VLD2LNdAsm_U32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode())); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + // Handle the Thumb2 mode MOV complex aliases. + case ARM::t2MOVsi: + case ARM::t2MOVSsi: { + // Which instruction to expand to depends on the CCOut operand and + // whether we're in an IT block if the register operands are low + // registers. + bool isNarrow = false; + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + isNarrow = true; + MCInst TmpInst; + unsigned newOpc; + switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; + case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + } + unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); + if (Ammount == 32) Ammount = 0; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + if (isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(Ammount)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + if (!isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + Inst = TmpInst; + return true; + } + // Handle the ARM mode MOV complex aliases. case ARM::ASRr: case ARM::LSRr: case ARM::LSLr: @@ -4743,6 +5656,24 @@ processInstruction(MCInst &Inst, Inst = TmpInst; } break; + case ARM::t2ADDri12: + // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" + // mnemonic was used (not "addw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2ADDri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; + case ARM::t2SUBri12: + // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" + // mnemonic was used (not "subw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2SUBri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; case ARM::tADDi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred @@ -4763,6 +5694,26 @@ processInstruction(MCInst &Inst, return true; } break; + case ARM::t2ADDrr: { + // If the destination and first source operand are the same, and + // there's no setting of the flags, use encoding T2 instead of T3. + // Note that this is only for ADD, not SUB. This mirrors the system + // 'as' behaviour. Make sure the wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + Inst.getOperand(5).getReg() != 0 || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(ARM::tADDhirr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::tB: // A Thumb conditional branch outside of an IT block is a tBcc. if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) { @@ -5079,12 +6030,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveWord(4, DirectiveID.getLoc()); else if (IDVal == ".thumb") return parseDirectiveThumb(DirectiveID.getLoc()); + else if (IDVal == ".arm") + return parseDirectiveARM(DirectiveID.getLoc()); else if (IDVal == ".thumb_func") return parseDirectiveThumbFunc(DirectiveID.getLoc()); else if (IDVal == ".code") return parseDirectiveCode(DirectiveID.getLoc()); else if (IDVal == ".syntax") return parseDirectiveSyntax(DirectiveID.getLoc()); + else if (IDVal == ".unreq") + return parseDirectiveUnreq(DirectiveID.getLoc()); return true; } @@ -5120,9 +6075,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); - // TODO: set thumb mode - // TODO: tell the MC streamer the mode - // getParser().getStreamer().Emit???(); + if (!isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + return false; +} + +/// parseDirectiveARM +/// ::= .arm +bool ARMAsmParser::parseDirectiveARM(SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + if (isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); return false; } @@ -5212,6 +6180,45 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { return false; } +/// parseDirectiveReq +/// ::= name .req registername +bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + Parser.Lex(); // Eat the '.req' token. + unsigned Reg; + SMLoc SRegLoc, ERegLoc; + if (ParseRegister(Reg, SRegLoc, ERegLoc)) { + Parser.EatToEndOfStatement(); + return Error(SRegLoc, "register name expected"); + } + + // Shouldn't be anything else. + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { + Parser.EatToEndOfStatement(); + return Error(Parser.getTok().getLoc(), + "unexpected input in .req directive."); + } + + Parser.Lex(); // Consume the EndOfStatement + + if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) + return Error(SRegLoc, "redefinition of '" + Name + + "' does not match original."); + + return false; +} + +/// parseDirectiveUneq +/// ::= .unreq registername +bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Parser.EatToEndOfStatement(); + return Error(L, "unexpected input in .unreq directive."); + } + RegisterReqs.erase(Parser.getTok().getIdentifier()); + Parser.Lex(); // Eat the identifier. + return false; +} + extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index 3f5ad39..e24a1b1 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser ) add_dependencies(LLVMARMAsmParser ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMAsmParser - LLVMARMDesc - LLVMARMInfo - LLVMMC - LLVMMCParser - LLVMSupport - ) diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt index cbf9b4b..f0184b6 100644 --- a/lib/Target/ARM/AsmParser/LLVMBuild.txt +++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMAsmParser parent = ARM required_libraries = ARMDesc ARMInfo MC MCParser Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 511932e..04cdf55 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -48,20 +48,6 @@ add_llvm_target(ARMCodeGen Thumb2SizeReduction.cpp ) -add_llvm_library_dependencies(LLVMARMCodeGen - LLVMARMAsmPrinter - LLVMARMDesc - LLVMARMInfo - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - # workaround for hanging compilation on MSVC9, 10 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ad250ab..49c64fd 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1d32Qwb_register: case ARM::VLD1d64Qwb_fixed: case ARM::VLD1d64Qwb_register: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: - case ARM::VLD2b8_UPD: - case ARM::VLD2b16_UPD: - case ARM::VLD2b32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2b8wb_register: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_register: case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2196,23 +2205,40 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST1q16wb_register: case ARM::VST1q32wb_register: case ARM::VST1q64wb_register: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Twb_register: + case ARM::VST1d16Twb_register: + case ARM::VST1d32Twb_register: + case ARM::VST1d64Twb_register: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST1d8Qwb_register: + case ARM::VST1d16Qwb_register: + case ARM::VST1d32Qwb_register: + case ARM::VST1d64Qwb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2d8wb_register: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_register: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2q8wb_register: + case ARM::VST2q16wb_register: + case ARM::VST2q32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: + case ARM::VST2b8wb_register: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_register: case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: @@ -2264,34 +2290,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Second input register switch (Inst.getOpcode()) { - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2307,12 +2305,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) return MCDisassembler::Fail; break; - case ARM::VST2b8: - case ARM::VST2b16: - case ARM::VST2b32: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: case ARM::VST3q8: case ARM::VST3q16: case ARM::VST3q32: @@ -2334,28 +2326,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Third input register switch (Inst.getOpcode()) { - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2392,20 +2362,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Fourth input register switch (Inst.getOpcode()) { - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST4d8: case ARM::VST4d16: case ARM::VST4d32: @@ -2441,16 +2397,11 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = fieldFromInstruction32(Insn, 6, 2); - unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1; align *= (1 << size); if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; - if (regs == 2) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) - return MCDisassembler::Fail; - } if (Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2460,12 +2411,12 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; - } + // The fixed offset post-increment encodes Rm == 0xd. The no-writeback + // variant encodes Rm == 0xf. Anything else is a register offset post- + // increment and we need to add the register operand to the instruction. + if (Rm != 0xD && Rm != 0xF && + !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; return S; } @@ -2693,7 +2644,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; unsigned op = fieldFromInstruction32(Insn, 6, 1); - unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1; if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2702,10 +2652,8 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // Writeback } - for (unsigned i = 0; i < length; ++i) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder))) + if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; - } if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -4138,4 +4086,3 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, return S; } - diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt index da87751..9de6e5c 100644 --- a/lib/Target/ARM/Disassembler/CMakeLists.txt +++ b/lib/Target/ARM/Disassembler/CMakeLists.txt @@ -11,11 +11,3 @@ set_property( ) endif() add_dependencies(LLVMARMDisassembler ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMDisassembler - LLVMARMCodeGen - LLVMARMDesc - LLVMARMInfo - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index baa9bc3..94075a9 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMDisassembler parent = ARM required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 6c6c021..662097a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1029,3 +1029,29 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}"; } + +void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; +} + diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3f38f1a..05db2d2 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -133,6 +133,12 @@ public: void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt index fa0b495..e2d4819 100644 --- a/lib/Target/ARM/InstPrinter/CMakeLists.txt +++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt @@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter ) add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMAsmPrinter - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt index b34aab4..6f4fa36 100644 --- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt +++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMAsmPrinter parent = ARM required_libraries = MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt index 9082539..fd4b3a3 100644 --- a/lib/Target/ARM/LLVMBuild.txt +++ b/lib/Target/ARM/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = ARM @@ -30,4 +33,3 @@ name = ARMCodeGen parent = ARM required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target add_to_library_groups = ARM - diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 62d04c4..bf1f0e8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -102,6 +102,11 @@ public: bool MayNeedRelaxation(const MCInst &Inst) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; @@ -124,14 +129,49 @@ public: }; } // end anonymous namespace +static unsigned getRelaxedOpcode(unsigned Op) { + switch (Op) { + default: return Op; + case ARM::tBcc: return ARM::t2Bcc; + } +} + bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { - // FIXME: Thumb targets, different move constant targets.. + if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()) + return true; return false; } +bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 254 || Offset < -256; +} + void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented"); - return; + unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); + + // Sanity check w/ diagnostic if we get here w/ a bogus instruction. + if (RelaxedOp == Inst.getOpcode()) { + SmallString<256> Tmp; + raw_svector_ostream OS(Tmp); + Inst.dump_pretty(OS); + OS << "\n"; + report_fatal_error("unexpected instruction to relax: " + OS.str()); + } + + // The instructions we're relaxing have (so far) the same operands. + // We just need to update to the proper opcode. + Res = Inst; + Res.setOpcode(RelaxedOp); } bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 865c3e2..c38a882 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1412,7 +1412,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return MO.getReg(); + return getARMRegisterNumbering(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 352c73e..f394b4f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" @@ -178,9 +179,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: MovtBit = 1; + // The thumb bit shouldn't be set in the 'other-half' bit of the + // relocation, but it will be set in FixedValue if the base symbol + // is a thumb function. Clear it out here. + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; break; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; MovtBit = 1; // Fallthrough case ARM::fixup_t2_movw_lo16: @@ -189,7 +197,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, break; } - if (Type == macho::RIT_ARM_HalfDifference) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index f529314..f2cf78a 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -10,10 +10,3 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen) # Hack: we need to include 'main' target directory to grab private headers include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) - -add_llvm_library_dependencies(LLVMARMDesc - LLVMARMAsmPrinter - LLVMARMInfo - LLVMMC - LLVMSupport - ) diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt index 46b11c7..2a7fe61 100644 --- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMDesc parent = ARM required_libraries = ARMAsmPrinter ARMInfo MC Support add_to_library_groups = ARM - diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 2df0053..000a37f 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID2 = TII->get(AddSubOpc); unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg) + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) .addReg(Src2Reg, getKillRegState(Src2Kill)); if (HasLane) MIB.addImm(LaneImm); MIB.addImm(Pred).addReg(PredReg); - MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2) + MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); if (NegAcc) { @@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isBarrier()) { + if (MI->isBarrier()) { clearStack(); Skip = 0; ++MII; diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt index 8b38b13..533e747 100644 --- a/lib/Target/ARM/TargetInfo/CMakeLists.txt +++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt @@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo ) add_dependencies(LLVMARMInfo ARMCommonTableGen) - -add_llvm_library_dependencies(LLVMARMInfo - LLVMMC - LLVMSupport - LLVMTarget - ) diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt index 046c1fc..a07a940 100644 --- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt +++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = ARMInfo parent = ARM required_libraries = MC Support Target add_to_library_groups = ARM - diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index e8ed482..e61c0a7 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -643,14 +643,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(Offset && "This code isn't needed if offset already handled!"); unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = MI.getDesc(); // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) removeOperands(MI, PIdx); - if (Desc.mayLoad()) { + if (MI.mayLoad()) { // Use the destination register to materialize sp + offset. unsigned TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; @@ -673,7 +672,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); - } else if (Desc.mayStore()) { + } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); bool UseRR = false; @@ -699,7 +698,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Add predicate back if it's needed. - if (MI.getDesc().isPredicable()) { + if (MI.isPredicable()) { MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index b627400..55b4d30 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -13,6 +13,7 @@ #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, // rsb r2, 0 // const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.hasOptionalDef() && + if (MI->hasOptionalDef() && MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR) return false; @@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && - (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { + (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; @@ -237,6 +238,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + // Finalize the bundle. + FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI); + Modified = true; ++NumITs; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index e5fc8b4..e206288 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -452,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); @@ -478,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumLdSts; return true; } @@ -513,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(ARM::tADDrSPi)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) @@ -525,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -533,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayLoad() || MCID.mayStore()) + if (MI->mayLoad() || MI->mayStore()) return ReduceLoadStore(MBB, MI, Entry); switch (Opc) { @@ -654,7 +653,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -678,7 +677,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++Num2Addrs; return true; } @@ -745,7 +744,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -785,7 +784,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -830,16 +829,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; + MachineInstr *BundleMI = 0; // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); - MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MachineBasicBlock::iterator NextMII; + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); + MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; + if (MI->isBundle()) { + BundleMI = MI; + continue; + } + LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); @@ -850,7 +855,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.Special) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; @@ -860,7 +865,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } @@ -869,15 +874,24 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } } ProcessNext: + if (LiveCPSR && + NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() && + BundleMI->killsRegister(ARM::CPSR)) + // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill + // marker is only on the BUNDLE instruction. Process the BUNDLE + // instruction as we finish with the bundled instruction to work around + // the inconsistency. + LiveCPSR = false; + bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); - if (MI->getDesc().isCall()) { + if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; IsSelfLoop = false; diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt index edf8ee7..fa819a4 100644 --- a/lib/Target/CBackend/CMakeLists.txt +++ b/lib/Target/CBackend/CMakeLists.txt @@ -2,16 +2,4 @@ add_llvm_target(CBackendCodeGen CBackend.cpp ) -add_llvm_library_dependencies(LLVMCBackendCodeGen - LLVMAnalysis - LLVMCBackendInfo - LLVMCodeGen - LLVMCore - LLVMMC - LLVMScalarOpts - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) - add_subdirectory(TargetInfo) diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index ca346af..8b2286e 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -21,10 +21,10 @@ namespace llvm { struct CTargetMachine : public TargetMachine { CTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS) {} + : TargetMachine(T, TT, CPU, FS, Options) { } virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt index 851ded9..e64feb0 100644 --- a/lib/Target/CBackend/LLVMBuild.txt +++ b/lib/Target/CBackend/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = TargetInfo + [component_0] type = TargetGroup name = CBackend @@ -26,4 +29,3 @@ name = CBackendCodeGen parent = CBackend required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils add_to_library_groups = CBackend - diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt index 8e616be..6203616 100644 --- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt +++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt @@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMCBackendInfo CBackendTargetInfo.cpp ) - -add_llvm_library_dependencies(LLVMCBackendInfo - LLVMMC - LLVMSupport - LLVMTarget - ) diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt index 35752b7..1b47d8e 100644 --- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CBackendInfo parent = CBackend required_libraries = MC Support Target add_to_library_groups = CBackend - diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 60e2189..22d8c76 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -3,7 +3,6 @@ add_llvm_library(LLVMTarget Target.cpp TargetData.cpp TargetELFWriterInfo.cpp - TargetFrameLowering.cpp TargetInstrInfo.cpp TargetIntrinsicInfo.cpp TargetLibraryInfo.cpp @@ -13,12 +12,6 @@ add_llvm_library(LLVMTarget TargetSubtargetInfo.cpp ) -add_llvm_library_dependencies(LLVMTarget - LLVMCore - LLVMMC - LLVMSupport - ) - foreach(t ${LLVM_TARGETS_TO_BUILD}) message(STATUS "Targeting ${t}") add_subdirectory(${t}) diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index b442a5c..6c67c2d 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -23,17 +23,5 @@ add_llvm_target(CellSPUCodeGen SPUNopFiller.cpp ) -add_llvm_library_dependencies(LLVMCellSPUCodeGen - LLVMAsmPrinter - LLVMCellSPUDesc - LLVMCellSPUInfo - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt index 4ae26b2..277620b 100644 --- a/lib/Target/CellSPU/LLVMBuild.txt +++ b/lib/Target/CellSPU/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = CellSPU @@ -27,4 +30,3 @@ name = CellSPUCodeGen parent = CellSPU required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target add_to_library_groups = CellSPU - diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt index d41fe93..0027bdb 100644 --- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt @@ -3,9 +3,4 @@ add_llvm_library(LLVMCellSPUDesc SPUMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMCellSPUDesc - LLVMCellSPUInfo - LLVMMC - ) - add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen) diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt index abc44a2..71e5bbc 100644 --- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CellSPUDesc parent = CellSPU required_libraries = CellSPUInfo MC add_to_library_groups = CellSPU - diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp index 093f99f..916f9ba 100644 --- a/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -47,7 +47,8 @@ bool SPUFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->getStackSize() && - (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()); + (MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects()); } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index d58e49b..dc0d5a6 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -296,12 +296,22 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i64, Expand); setOperationAction(ISD::CTTZ , MVT::i128, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand); setOperationAction(ISD::CTLZ , MVT::i8, Promote); setOperationAction(ISD::CTLZ , MVT::i16, Promote); setOperationAction(ISD::CTLZ , MVT::i32, Legal); setOperationAction(ISD::CTLZ , MVT::i64, Expand); setOperationAction(ISD::CTLZ , MVT::i128, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand); // SPU has a version of select that implements (a&~c)|(b&c), just like // select ought to work: diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 6940316..1e922a4 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -34,9 +34,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 909f12e..0841fee 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine { InstrItineraryData InstrItins; public: SPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt index 3f2d6b09..6a98f95 100644 --- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt +++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMCellSPUInfo CellSPUTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMCellSPUInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen) diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt index 0710cc3..6937e70 100644 --- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CellSPUInfo parent = CellSPU required_libraries = MC Support Target add_to_library_groups = CellSPU - diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt index 53f6868..515e1dd 100644 --- a/lib/Target/CppBackend/CMakeLists.txt +++ b/lib/Target/CppBackend/CMakeLists.txt @@ -2,11 +2,4 @@ add_llvm_target(CppBackendCodeGen CPPBackend.cpp ) -add_llvm_library_dependencies(LLVMCppBackendCodeGen - LLVMCore - LLVMCppBackendInfo - LLVMSupport - LLVMTarget - ) - add_subdirectory(TargetInfo) diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index a3613b4..92bca6c 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -23,10 +23,10 @@ class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { CPPTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS) {} + : TargetMachine(T, TT, CPU, FS, Options) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt index 77e31c7..122b5e7 100644 --- a/lib/Target/CppBackend/LLVMBuild.txt +++ b/lib/Target/CppBackend/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = TargetInfo + [component_0] type = TargetGroup name = CppBackend @@ -26,4 +29,3 @@ name = CppBackendCodeGen parent = CppBackend required_libraries = Core CppBackendInfo Support Target add_to_library_groups = CppBackend - diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt index 738b215..f82d72e 100644 --- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt +++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt @@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMCppBackendInfo CppBackendTargetInfo.cpp ) - -add_llvm_library_dependencies(LLVMCppBackendInfo - LLVMMC - LLVMSupport - LLVMTarget - ) diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt index 67a23ba..d4dfc3e 100644 --- a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = CppBackendInfo parent = CppBackend required_libraries = MC Support Target add_to_library_groups = CppBackend - diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt new file mode 100644 index 0000000..f8705ee --- /dev/null +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -0,0 +1,35 @@ +set(LLVM_TARGET_DEFINITIONS Hexagon.td) + +tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv) +tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM HexagonGenIntrinsics.inc -gen-tgt-intrinsic) +add_public_tablegen_target(HexagonCommonTableGen) + +add_llvm_target(HexagonCodeGen + HexagonAsmPrinter.cpp + HexagonCallingConvLower.cpp + HexagonCFGOptimizer.cpp + HexagonExpandPredSpillCode.cpp + HexagonFrameLowering.cpp + HexagonHardwareLoops.cpp + HexagonInstrInfo.cpp + HexagonISelDAGToDAG.cpp + HexagonISelLowering.cpp + HexagonMCAsmInfo.cpp + HexagonOptimizeSZExtends.cpp + HexagonRegisterInfo.cpp + HexagonRemoveSZExtArgs.cpp + HexagonSelectionDAGInfo.cpp + HexagonSplitTFRCondSets.cpp + HexagonSubtarget.cpp + HexagonTargetMachine.cpp + HexagonTargetObjectFile.cpp + ) + +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) + diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h new file mode 100644 index 0000000..a5f2279 --- /dev/null +++ b/lib/Target/Hexagon/Hexagon.h @@ -0,0 +1,54 @@ +//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Hexagon back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_Hexagon_H +#define TARGET_Hexagon_H + +#include <cassert> +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + class FunctionPass; + class TargetMachine; + class HexagonTargetMachine; + class raw_ostream; + + FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM); + FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createHexagonFPMoverPass(TargetMachine &TM); + FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM); + FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM); + + FunctionPass* createHexagonSplitTFRCondSets(HexagonTargetMachine &TM); + FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); + + FunctionPass *createHexagonHardwareLoops(); + FunctionPass *createHexagonOptimizeSZExtends(); + FunctionPass *createHexagonFixupHwLoops(); + +} // end namespace llvm; + +#define Hexagon_POINTER_SIZE 4 + +#define Hexagon_PointerSize (Hexagon_POINTER_SIZE) +#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8) +#define Hexagon_WordSize Hexagon_PointerSize +#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits + +// allocframe saves LR and FP on stack before allocating +// a new stack frame. This takes 8 bytes. +#define HEXAGON_LRFP_SIZE 8 + +#endif diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td new file mode 100644 index 0000000..72939e6 --- /dev/null +++ b/lib/Target/Hexagon/Hexagon.td @@ -0,0 +1,66 @@ +//===- Hexagon.td - Describe the Hexagon Target Machine ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Hexagon Subtarget features. +// + + +// Hexagon Archtectures +def ArchV2 : SubtargetFeature<"v2", "HexagonArchVersion", "V2", + "Hexagon v2">; +def ArchV3 : SubtargetFeature<"v3", "HexagonArchVersion", "V3", + "Hexagon v3">; +def ArchV4 : SubtargetFeature<"v4", "HexagonArchVersion", "V4", + "Hexagon v4">; + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// +include "HexagonSchedule.td" +include "HexagonRegisterInfo.td" +include "HexagonCallingConv.td" +include "HexagonInstrInfo.td" +include "HexagonIntrinsics.td" +include "HexagonIntrinsicsDerived.td" + + +def HexagonInstrInfo : InstrInfo { + // Define how we want to layout our target-specific information field. +} + +//===----------------------------------------------------------------------===// +// Hexagon processors supported. +//===----------------------------------------------------------------------===// + +class Proc<string Name, ProcessorItineraries Itin, + list<SubtargetFeature> Features> + : Processor<Name, Itin, Features>; + +def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>; +def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>; +def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def Hexagon : Target { + // Pull in Instruction Info: + let InstructionSet = HexagonInstrInfo; +} diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp new file mode 100644 index 0000000..8f8e804 --- /dev/null +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -0,0 +1,555 @@ +//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ----=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Hexagon assembly language. This printer is +// the output mechanism used by `llc'. +// +// Documentation at http://developer.apple.com/documentation/DeveloperTools/ +// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html +// +//===----------------------------------------------------------------------===// + + +#define DEBUG_TYPE "asm-printer" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static cl::opt<bool> AlignCalls( + "hexagon-align-calls", cl::Hidden, cl::init(true), + cl::desc("Insert falign after call instruction for Hexagon target")); + + +namespace { + class HexagonAsmPrinter : public AsmPrinter { + const HexagonSubtarget *Subtarget; + + public: + explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<HexagonSubtarget>(); + } + + virtual const char *getPassName() const { + return "Hexagon Assembly Printer"; + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + void printInstruction(const MachineInstr *MI, raw_ostream &O); + virtual void EmitInstruction(const MachineInstr *MI); + + void printOp(const MachineOperand &MO, raw_ostream &O); + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero, + raw_ostream &O) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); + O << getRegisterName(RegNo); + } + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + printRegister(MO, false, OS); + } else if (MO.isImm()) { + OS << MO.getImm(); + } else { + printOp(MO, OS); + } + } + + + bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + + + void printHexagonImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << value; + } + + + void printHexagonNegImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << -value; + } + + void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << " + #" + << (int) MO2.getImm(); + } + + + void printHexagonFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << ", #" + << MO2.getImm(); + } + + void printBranchOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + if (MI->getOperand(OpNo).isImm()) { + O << "$+" << MI->getOperand(OpNo).getImm()*4; + } else { + printOp(MI->getOperand(OpNo), O); + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printHexagonImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printHexagonImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O); + + void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, + raw_ostream &O); + + void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); + + void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const; + + static const char *getRegisterName(unsigned RegNo); + }; + +} // end of anonymous namespace + +// Include the auto-generated portion of the assembly writer. +#include "HexagonGenAsmWriter.inc" + + +void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, + const GlobalValue *GV) const { + + // For basic block level alignment, use falign. + if (!GV) { + OutStreamer.EmitRawText(StringRef("\t.falign")); + return; + } + + AsmPrinter::EmitAlignment(NumBits, GV); +} + +void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + dbgs() << "printOp() does not handle immediate values\n"; + abort(); + return; + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << *GetJTISymbol(MO.getIndex()); + // FIXME: PIC relocation model. + return; + case MachineOperand::MO_ConstantPoolIndex: + O << *GetCPISymbol(MO.getIndex()); + return; + case MachineOperand::MO_ExternalSymbol: + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + return; + case MachineOperand::MO_GlobalAddress: { + // Computing the address of a global symbol, not calling it. + O << *Mang->getSymbol(MO.getGlobal()); + printOffset(MO.getOffset(), O); + return; + } + + default: + O << "<unknown operand type: " << MO.getType() << ">"; + return; + } +} + + +// +// isBlockOnlyReachableByFallthrough - We need to override this since the +// default AsmPrinter does not print labels for any basic block that +// is only reachable by a fall through. That works for all cases except +// for the case in which the basic block is reachable by a fall through but +// through an indirect from a jump table. In this case, the jump table +// will contain a label not defined by AsmPrinter. +// +bool HexagonAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + if (MBB->hasAddressTaken()) { + return false; + } + return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); +} + + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'c': // Don't print "$" before a global var name or constant. + // Hexagon never has a prefix. + printOperand(MI, OpNo, OS); + return false; + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + OS << "i"; + return false; + } + } + + printOperand(MI, OpNo, OS); + return false; +} + +bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + const MachineOperand &Base = MI->getOperand(OpNo); + const MachineOperand &Offset = MI->getOperand(OpNo+1); + + if (Base.isReg()) + printOperand(MI, OpNo, O); + else + assert(0 && "Unimplemented"); + + if (Offset.isImm()) { + if (Offset.getImm()) + O << " + #" << Offset.getImm(); + } + else + assert(0 && "Unimplemented"); + + return false; +} + +void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, + unsigned OpNo, + raw_ostream &O) { + assert(0 && "Unimplemented"); +} + + +/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to +/// the current output stream. +/// +void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream O(Str); + + const MachineFunction* MF = MI->getParent()->getParent(); + const HexagonMachineFunctionInfo* MFI = + (const HexagonMachineFunctionInfo*) + MF->getInfo<HexagonMachineFunctionInfo>(); + + + + // Print a brace for the beginning of the packet. + if (MFI->isStartPacket(MI)) { + O << "\t{" << '\n'; + } + + DEBUG( O << "// MI = " << *MI << '\n';); + + // Indent + O << "\t"; + + + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + if (MFI->isEndPacket(MI) && MFI->isStartPacket(MI)) { + O << "\t{ nop }"; + } else { + O << "}"; + } + printInstruction(MI, O); + } else if (MI->getOpcode() == Hexagon::STriwt) { + // + // Handle truncated store on Hexagon. + // + O << "\tmemw("; + printHexagonMEMriOperand(MI, 0, O); + + O << ") = "; + unsigned SubRegNum = + TM.getRegisterInfo()->getSubReg(MI->getOperand(2) + .getReg(), Hexagon::subreg_loreg); + const char *SubRegName = getRegisterName(SubRegNum); + O << SubRegName << '\n'; + } else if (MI->getOpcode() == Hexagon::MPYI_rin) { + // Handle multipy with -ve constant on Hexagon: + // "$dst =- mpyi($src1, #$src2)" + printOperand(MI, 0, O); + O << " =- mpyi("; + printOperand(MI, 1, O); + O << ", #"; + printHexagonNegImmOperand(MI, 2, O); + O << ")"; + } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_indexed_MEM_V4) { + // + // Handle memw(Rs+u6:2) [+-]= #U5 + // + O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_MEM_V4) { + // + // Handle memw(Rs+u6:2) [+-]= #U5 + // + O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_indexed_MEM_V4) { + // + // Handle memh(Rs+u6:1) [+-]= #U5 + // + O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_MEM_V4) { + // + // Handle memh(Rs+u6:1) [+-]= #U5 + // + O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_indexed_MEM_V4) { + // + // Handle memb(Rs+u6:1) [+-]= #U5 + // + O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_MEM_V4) { + // + // Handle memb(Rs+u6:1) [+-]= #U5 + // + O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; + int addend = MI->getOperand(2).getImm(); + if (addend < 0) + O << "-= " << "#" << -addend << '\n'; + else + O << "+= " << "#" << addend << '\n'; + } else if (MI->getOpcode() == Hexagon::CMPbGTri_V4) { + // + // Handle Pd=cmpb.gt(Rs,#s8) + // + O << "\t"; + printRegister(MI->getOperand(0), false, O); + O << " = cmpb.gt("; + printRegister(MI->getOperand(1), false, O); + O << ", "; + int val = MI->getOperand(2).getImm() >> 24; + O << "#" << val << ")" << '\n'; + } else if (MI->getOpcode() == Hexagon::CMPhEQri_V4) { + // + // Handle Pd=cmph.eq(Rs,#8) + // + O << "\t"; + printRegister(MI->getOperand(0), false, O); + O << " = cmph.eq("; + printRegister(MI->getOperand(1), false, O); + O << ", "; + int val = MI->getOperand(2).getImm(); + assert((((0 <= val) && (val <= 127)) || + ((65408 <= val) && (val <= 65535))) && + "Not in correct range!"); + if (val >= 65408) val -= 65536; + O << "#" << val << ")" << '\n'; + } else if (MI->getOpcode() == Hexagon::CMPhGTri_V4) { + // + // Handle Pd=cmph.gt(Rs,#8) + // + O << "\t"; + printRegister(MI->getOperand(0), false, O); + O << " = cmph.gt("; + printRegister(MI->getOperand(1), false, O); + O << ", "; + int val = MI->getOperand(2).getImm() >> 16; + O << "#" << val << ")" << '\n'; + } else { + printInstruction(MI, O); + } + + // Print a brace for the end of the packet. + if (MFI->isEndPacket(MI) && MI->getOpcode() != Hexagon::ENDLOOP0) { + O << "\n\t}" << '\n'; + } + + if (AlignCalls && MI->getDesc().isCall()) { + O << "\n\t.falign" << "\n"; + } + + OutStreamer.EmitRawText(O.str()); + return; +} + +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \n or \0. +// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { +// for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); +// Name != E; ++Name) +// if (isprint(*Name)) +// OS << *Name; +// } + + +void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI, + int OpNo, raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << " + #" + << MO2.getImm(); +} + + +void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_GlobalAddress) && + "Expecting global address"); + + O << *Mang->getSymbol(MO.getGlobal()); + if (MO.getOffset() != 0) { + O << " + "; + O << MO.getOffset(); + } +} + +void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) && + "Expecting jump table index"); + + // Hexagon_TODO: Do we need name mangling? + O << *GetJTISymbol(MO.getIndex()); +} + +extern "C" void LLVMInitializeHexagonAsmPrinter() { + RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); +} diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp new file mode 100644 index 0000000..38000e7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -0,0 +1,240 @@ +//===---- HexagonCFGOptimizer.cpp - CFG optimizations ---------------------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#define DEBUG_TYPE "hexagon_cfg" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include <iostream> + +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +namespace { + +class HexagonCFGOptimizer : public MachineFunctionPass { + +private: + HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*); + + public: + static char ID; + HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID), + QTM(TM), + QST(*TM.getSubtargetImpl()) {} + + const char *getPassName() const { + return "Hexagon CFG Optimizer"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonCFGOptimizer::ID = 0; + +static bool IsConditionalBranch(int Opc) { + return (Opc == Hexagon::JMP_Pred) || (Opc == Hexagon::JMP_PredNot) + || (Opc == Hexagon::JMP_PredPt) || (Opc == Hexagon::JMP_PredNotPt); +} + + +static bool IsUnconditionalJump(int Opc) { + return (Opc == Hexagon::JMP); +} + + +void +HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI, + MachineBasicBlock* NewTarget) { + const HexagonInstrInfo *QII = QTM.getInstrInfo(); + int NewOpcode = 0; + switch(MI->getOpcode()) { + case Hexagon::JMP_Pred: + NewOpcode = Hexagon::JMP_PredNot; + break; + + case Hexagon::JMP_PredNot: + NewOpcode = Hexagon::JMP_Pred; + break; + + case Hexagon::JMP_PredPt: + NewOpcode = Hexagon::JMP_PredNotPt; + break; + + case Hexagon::JMP_PredNotPt: + NewOpcode = Hexagon::JMP_PredPt; + break; + + default: + assert(0 && "Cannot handle this case"); + } + + MI->setDesc(QII->get(NewOpcode)); + MI->getOperand(1).setMBB(NewTarget); +} + + +bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + + // Traverse the basic block. + MachineBasicBlock::iterator MII = MBB->getFirstTerminator(); + if (MII != MBB->end()) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (IsConditionalBranch(Opc)) { + + // + // (Case 1) Transform the code if the following condition occurs: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...next block in layout is BB3... + // BB3: ... + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // + // (Case 2) A variation occurs when BB3 contains a JMP to BB4: + // BB1: if (p0) jump BB3 + // ...falls-through to BB2 ... + // BB2: jump BB4 + // ...other basic blocks ... + // BB4: + // ...not a fall-thru + // BB3: ... + // jump BB4 + // + // Transform this to: + // BB1: if (!p0) jump BB4 + // Remove BB2 + // BB3: ... + // BB4: ... + // + unsigned NumSuccs = MBB->succ_size(); + MachineBasicBlock::succ_iterator SI = MBB->succ_begin(); + MachineBasicBlock* FirstSucc = *SI; + MachineBasicBlock* SecondSucc = *(++SI); + MachineBasicBlock* LayoutSucc = NULL; + MachineBasicBlock* JumpAroundTarget = NULL; + + if (MBB->isLayoutSuccessor(FirstSucc)) { + LayoutSucc = FirstSucc; + JumpAroundTarget = SecondSucc; + } else if (MBB->isLayoutSuccessor(SecondSucc)) { + LayoutSucc = SecondSucc; + JumpAroundTarget = FirstSucc; + } else { + // Odd case...cannot handle. + } + + // The target of the unconditional branch must be JumpAroundTarget. + // TODO: If not, we should not invert the unconditional branch. + MachineBasicBlock* CondBranchTarget = NULL; + if ((MI->getOpcode() == Hexagon::JMP_Pred) || + (MI->getOpcode() == Hexagon::JMP_PredNot)) { + CondBranchTarget = MI->getOperand(1).getMBB(); + } + + if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) { + continue; + } + + if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) { + + // Ensure that BB2 has one instruction -- an unconditional jump. + if ((LayoutSucc->size() == 1) && + IsUnconditionalJump(LayoutSucc->front().getOpcode())) { + MachineBasicBlock* UncondTarget = + LayoutSucc->front().getOperand(0).getMBB(); + // Check if the layout successor of BB2 is BB3. + bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget); + bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) && + JumpAroundTarget->size() >= 1 && + IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) && + JumpAroundTarget->pred_size() == 1 && + JumpAroundTarget->succ_size() == 1; + + if (case1 || case2) { + InvertAndChangeJumpTarget(MI, UncondTarget); + MBB->removeSuccessor(JumpAroundTarget); + MBB->addSuccessor(UncondTarget); + + // Remove the unconditional branch in LayoutSucc. + LayoutSucc->erase(LayoutSucc->begin()); + LayoutSucc->removeSuccessor(UncondTarget); + LayoutSucc->addSuccessor(JumpAroundTarget); + + // This code performs the conversion for case 2, which moves + // the block to the fall-thru case (BB3 in the code above). + if (case2 && !case1) { + JumpAroundTarget->moveAfter(LayoutSucc); + // only move a block if it doesn't have a fall-thru. otherwise + // the CFG will be incorrect. + if (!UncondTarget->canFallThrough()) { + UncondTarget->moveAfter(JumpAroundTarget); + } + } + + // + // Correct live-in information. Is used by post-RA scheduler + // The live-in to LayoutSucc is now all values live-in to + // JumpAroundTarget. + // + std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(), + LayoutSucc->livein_end()); + std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(), + JumpAroundTarget->livein_end()); + for (unsigned i = 0; i < OrigLiveIn.size(); ++i) { + LayoutSucc->removeLiveIn(OrigLiveIn[i]); + } + for (unsigned i = 0; i < NewLiveIn.size(); ++i) { + LayoutSucc->addLiveIn(NewLiveIn[i]); + } + } + } + } + } + } + } + return true; +} +} + + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) { + return new HexagonCFGOptimizer(TM); +} diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td new file mode 100644 index 0000000..bd9608b --- /dev/null +++ b/lib/Target/Hexagon/HexagonCallingConv.td @@ -0,0 +1,35 @@ +//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the Hexagon architectures. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// Hexagon 32-bit C return-value convention. +def RetCC_Hexagon32 : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; + +// Hexagon 32-bit C Calling convention. +def CC_Hexagon32 : CallingConv<[ + // All arguments get passed in integer registers if there is space. + CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>, + CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>, + + // Alternatively, they are assigned to the stack in 4-byte aligned units. + CCAssignToStack<4, 4> +]>; diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp new file mode 100644 index 0000000..2e51dbf --- /dev/null +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -0,0 +1,207 @@ +//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Hexagon_CCState class, used for lowering and +// implementing calling conventions. Adapted from the machine independent +// version of the class (CCState) but this handles calls to varargs functions +// +//===----------------------------------------------------------------------===// + +#include "HexagonCallingConvLower.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "Hexagon.h" +using namespace llvm; + +Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, + const TargetMachine &tm, + SmallVector<CCValAssign, 16> &locs, + LLVMContext &c) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset, + LocVT.getSimpleVT(), LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void Hexagon_CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// incorporating info about the formals into this state. +void +Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> + &Ins, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + unsigned NumArgs = Ins.size(); + unsigned i = 0; + + // If the function returns a small struct in registers, skip + // over the first (dummy) argument. + if (SretValueInRegs != 0) { + ++i; + } + + + for (; i != NumArgs; ++i) { + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) { + dbgs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// incorporating info about the result values into this state. +void +Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + + // For Hexagon, Return small structures in registers. + if (SretValueInRegs != 0) { + if (SretValueInRegs <= 32) { + unsigned Reg = Hexagon::R0; + addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32, + CCValAssign::Full)); + return; + } + if (SretValueInRegs <= 64) { + unsigned Reg = Hexagon::D0; + addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64, + CCValAssign::Full)); + return; + } + } + + + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){ + dbgs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } + } +} + + +/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info +/// about the passed values into this state. +void +Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> + &Outs, + Hexagon_CCAssignFn Fn, + int NonVarArgsParams, + unsigned SretValueSize) { + unsigned NumOps = Outs.size(); + + unsigned i = 0; + // If the called function returns a small struct in registers, skip + // the first actual parameter. We do not want to pass a pointer to + // the stack location. + if (SretValueSize != 0) { + ++i; + } + + for (; i != NumOps; ++i) { + EVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, + NonVarArgsParams, i+1, false)) { + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void +Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + Hexagon_CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1, + false)) { + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// incorporating info about the passed values into this state. +void +Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, + unsigned SretValueInRegs) { + + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) { + dbgs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1, + false)) { + dbgs() << "Call result has unhandled type " + << VT.getEVTString() << "\n"; + abort(); + } +} diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h new file mode 100644 index 0000000..1f601e8 --- /dev/null +++ b/lib/Target/Hexagon/HexagonCallingConvLower.h @@ -0,0 +1,189 @@ +//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon_CCState class, used for lowering +// and implementing calling conventions. Adapted from the target independent +// version but this handles calls to varargs functions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H +#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/CallingConvLower.h" + +// +// Need to handle varargs. +// +namespace llvm { + class TargetRegisterInfo; + class TargetMachine; + class Hexagon_CCState; + class SDNode; + + +/// Hexagon_CCAssignFn - This function assigns a location for Val, updating +/// State to reflect the change. +typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem); + + +/// CCState - This class holds information needed while lowering arguments and +/// return values. It captures which registers are already assigned and which +/// stack slots are used. It provides accessors to allocate these values. +class Hexagon_CCState { + CallingConv::ID CallingConv; + bool IsVarArg; + const TargetMachine &TM; + const TargetRegisterInfo &TRI; + SmallVector<CCValAssign, 16> &Locs; + LLVMContext &Context; + + unsigned StackOffset; + SmallVector<uint32_t, 16> UsedRegs; +public: + Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, + SmallVector<CCValAssign, 16> &locs, LLVMContext &c); + + void addLoc(const CCValAssign &V) { + Locs.push_back(V); + } + + LLVMContext &getContext() const { return Context; } + const TargetMachine &getTarget() const { return TM; } + unsigned getCallingConv() const { return CallingConv; } + bool isVarArg() const { return IsVarArg; } + + unsigned getNextStackOffset() const { return StackOffset; } + + /// isAllocated - Return true if the specified register (or an alias) is + /// allocated. + bool isAllocated(unsigned Reg) const { + return UsedRegs[Reg/32] & (1 << (Reg&31)); + } + + /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, + /// incorporating info about the formals into this state. + void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeReturn - Analyze the returned values of an ISD::RET node, + /// incorporating info about the result values into this state. + void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info + /// about the passed values into this state. + void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + Hexagon_CCAssignFn Fn, int NonVarArgsParams, + unsigned SretValueSize); + + /// AnalyzeCallOperands - Same as above except it takes vectors of types + /// and argument flags. + void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + Hexagon_CCAssignFn Fn); + + /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, + /// incorporating info about the passed values into this state. + void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + Hexagon_CCAssignFn Fn, unsigned SretValueInRegs); + + /// AnalyzeCallResult - Same as above except it's specialized for calls which + /// produce a single value. + void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn); + + /// getFirstUnallocated - Return the first unallocated register in the set, or + /// NumRegs if they are all allocated. + unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const { + for (unsigned i = 0; i != NumRegs; ++i) + if (!isAllocated(Regs[i])) + return i; + return NumRegs; + } + + /// AllocateReg - Attempt to allocate one register. If it is not available, + /// return zero. Otherwise, return the register, marking it and any aliases + /// as allocated. + unsigned AllocateReg(unsigned Reg) { + if (isAllocated(Reg)) return 0; + MarkAllocated(Reg); + return Reg; + } + + /// Version of AllocateReg with extra register to be shadowed. + unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) { + if (isAllocated(Reg)) return 0; + MarkAllocated(Reg); + MarkAllocated(ShadowReg); + return Reg; + } + + /// AllocateReg - Attempt to allocate one of the specified registers. If none + /// are available, return zero. Otherwise, return the first one available, + /// marking it and any aliases as allocated. + unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) { + unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); + if (FirstUnalloc == NumRegs) + return 0; // Didn't find the reg. + + // Mark the register and any aliases as allocated. + unsigned Reg = Regs[FirstUnalloc]; + MarkAllocated(Reg); + return Reg; + } + + /// Version of AllocateReg with list of registers to be shadowed. + unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs, + unsigned NumRegs) { + unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs); + if (FirstUnalloc == NumRegs) + return 0; // Didn't find the reg. + + // Mark the register and any aliases as allocated. + unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc]; + MarkAllocated(Reg); + MarkAllocated(ShadowReg); + return Reg; + } + + /// AllocateStack - Allocate a chunk of stack space with the specified size + /// and alignment. + unsigned AllocateStack(unsigned Size, unsigned Align) { + assert(Align && ((Align-1) & Align) == 0); // Align is power of 2. + StackOffset = ((StackOffset + Align-1) & ~(Align-1)); + unsigned Result = StackOffset; + StackOffset += Size; + return Result; + } + + // HandleByVal - Allocate a stack slot large enough to pass an argument by + // value. The size and alignment information of the argument is encoded in its + // parameter attribute. + void HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags); + +private: + /// MarkAllocated - Mark a register and all of its aliases as allocated. + void MarkAllocated(unsigned Reg); +}; + + + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp new file mode 100644 index 0000000..cb73ae0 --- /dev/null +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -0,0 +1,184 @@ +//===--- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===//// +// The Hexagon processor has no instructions that load or store predicate +// registers directly. So, when these registers must be spilled a general +// purpose register must be found and the value copied to/from it from/to +// the predicate register. This code currently does not use the register +// scavenger mechanism available in the allocator. There are two registers +// reserved to allow spilling/restoring predicate registers. One is used to +// hold the predicate value. The other is used when stack frame offsets are +// too large. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include <map> +#include <iostream> + +#include "llvm/Support/CommandLine.h" + + +using namespace llvm; + + +namespace { + +class HexagonExpandPredSpillCode : public MachineFunctionPass { + HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + public: + static char ID; + HexagonExpandPredSpillCode(HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + + const char *getPassName() const { + return "Hexagon Expand Predicate Spill Code"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonExpandPredSpillCode::ID = 0; + + +bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) { + + const HexagonInstrInfo *TII = QTM.getInstrInfo(); + const HexagonRegisterInfo *RegInfo = QTM.getRegisterInfo(); + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::STriw_pred) { + // STriw_pred [R30], ofst, SrcReg; + unsigned FP = MI->getOperand(0).getReg(); + assert(FP == RegInfo->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(1).isImm() && "Not an offset"); + int Offset = MI->getOperand(1).getImm(); + int SrcReg = MI->getOperand(2).getReg(); + assert(Hexagon::PredRegsRegClass.contains(SrcReg) && + "Not a predicate register"); + if (!TII->isValidOffset(Hexagon::STriw, Offset)) { + if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr), + HEXAGON_RESERVED_REG_1) + .addReg(FP).addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::STriw)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0) + .addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd), + HEXAGON_RESERVED_REG_2).addReg(SrcReg); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)). + addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::LDriw_pred) { + // DstReg = LDriw_pred [R30], ofst. + int DstReg = MI->getOperand(0).getReg(); + assert(Hexagon::PredRegsRegClass.contains(DstReg) && + "Not a predicate register"); + unsigned FP = MI->getOperand(1).getReg(); + assert(FP == RegInfo->getFrameRegister() && + "Not a Frame Pointer, Nor a Spill Slot"); + assert(MI->getOperand(2).isImm() && "Not an offset"); + int Offset = MI->getOperand(2).getImm(); + if (!TII->isValidOffset(Hexagon::LDriw, Offset)) { + if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MBB, MII, MI->getDebugLoc(), + TII->get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr), + HEXAGON_RESERVED_REG_1) + .addReg(FP) + .addReg(HEXAGON_RESERVED_REG_1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri), + HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2) + .addReg(HEXAGON_RESERVED_REG_1) + .addImm(0); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + } else { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw), + HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs), + DstReg).addReg(HEXAGON_RESERVED_REG_2); + } + MII = MBB->erase(MI); + --MII; + } + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) { + return new HexagonExpandPredSpillCode(TM); +} diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp new file mode 100644 index 0000000..78e0b1c --- /dev/null +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -0,0 +1,333 @@ +//==-- HexagonFrameLowering.cpp - Define frame lowering --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// +//===----------------------------------------------------------------------===// +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonFrameLowering.h" + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <iostream> + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Function.h" +using namespace llvm; + +static cl::opt<bool> DisableDeallocRet( + "disable-hexagon-dealloc-ret", + cl::Hidden, + cl::desc("Disable Dealloc Return for Hexagon target")); + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo. + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target. + unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign); + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = RoundUpToAlignment(FrameSize, TargetAlign); + + // Update frame info. + MFI->setStackSize(FrameSize); +} + + +void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo()); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + determineFrameLayout(MF); + + // Check if frame moves are needed for EH. + bool needsFrameMoves = MMI.hasDebugInfo() || + !MF.getFunction()->needsUnwindTableEntry(); + + // Get the number of bytes to allocate from the FrameInfo. + int NumBytes = (int) MFI->getStackSize(); + + // LLVM expects allocframe not to be the first instruction in the + // basic block. + MachineBasicBlock::iterator InsertPt = MBB.begin(); + + // + // ALLOCA adjust regs. Iterate over ADJDYNALLOC nodes and change the offset. + // + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + const std::vector<MachineInstr*>& AdjustRegs = + FuncInfo->getAllocaAdjustInsts(); + for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(), + e = AdjustRegs.end(); + i != e; ++i) { + MachineInstr* MI = *i; + assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) && + "Expected adjust alloca node"); + + MachineOperand& MO = MI->getOperand(2); + assert(MO.isImm() && "Expected immediate"); + MO.setImm(MFI->getMaxCallFrameSize()); + } + + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + + if (needsFrameMoves) { + // Advance CFA. DW_CFA_def_cfa + unsigned FPReg = QRI->getFrameRegister(); + unsigned RAReg = QRI->getRARegister(); + + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(FPReg, -8); + Moves.push_back(MachineMove(0, Dst, Src)); + + // R31 = (R31 - #4) + MachineLocation LRDst(RAReg, -4); + MachineLocation LRSrc(RAReg); + Moves.push_back(MachineMove(0, LRDst, LRSrc)); + + // R30 = (R30 - #8) + MachineLocation SPDst(FPReg, -8); + MachineLocation SPSrc(FPReg); + Moves.push_back(MachineMove(0, SPDst, SPSrc)); + } + + // + // Only insert ALLOCFRAME if we need to. + // + if (hasFP(MF)) { + // Check for overflow. + // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? + const int ALLOCFRAME_MAX = 16384; + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + if (NumBytes >= ALLOCFRAME_MAX) { + // Emit allocframe(#0). + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0); + + // Subtract offset from frame pointer. + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real), + HEXAGON_RESERVED_REG_1).addImm(NumBytes); + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr), + QRI->getStackRegister()). + addReg(QRI->getStackRegister()). + addReg(HEXAGON_RESERVED_REG_1); + } else { + BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes); + } + } +} +// Returns true if MBB has a machine instructions that indicates a tail call +// in the block. +bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + unsigned RetOpcode = MBBI->getOpcode(); + + return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;} + +void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + DebugLoc dl = MBBI->getDebugLoc(); + // + // Only insert deallocframe if we need to. + // + if (hasFP(MF)) { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + MachineBasicBlock::iterator MBBI_end = MBB.end(); + // + // For Hexagon, we don't need the frame size. + // + MachineFrameInfo *MFI = MF.getFrameInfo(); + int NumBytes = (int) MFI->getStackSize(); + + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher + // versions. + if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR + && !DisableDeallocRet) { + // Remove jumpr node. + MBB.erase(MBBI); + // Add dealloc_return. + BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4)) + .addImm(NumBytes); + } else { // Add deallocframe for V2 and V3. + BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes); + } + } +} + +bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + return (MFI->hasCalls() || (MFI->getStackSize() > 0) || + FuncInfo->hasClobberLR() ); +} + +bool +HexagonFrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + if (CSI.empty()) { + return false; + } + + // We can only schedule double loads if we spill contiguous callee-saved regs + // For instance, we cannot scheduled double-word loads if we spill r24, + // r26, and r27. + // Hexagon_TODO: We can try to double-word align odd registers for -O2 and + // above. + bool ContiguousRegs = true; + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + + // + // Check if we can use a double-word store. + // + const unsigned* SuperReg = TRI->getSuperRegisters(Reg); + + // Assume that there is exactly one superreg. + assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); + bool CanUseDblStore = false; + const TargetRegisterClass* SuperRegClass = 0; + + if (ContiguousRegs && (i < CSI.size()-1)) { + const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); + assert(SuperRegNext[0] && !SuperRegNext[1] && + "Expected exactly one superreg"); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); + CanUseDblStore = (SuperRegNext[0] == SuperReg[0]); + } + + + if (CanUseDblStore) { + TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true, + CSI[i+1].getFrameIdx(), SuperRegClass, TRI); + MBB.addLiveIn(SuperReg[0]); + ++i; + } else { + // Cannot use a double-word store. + ContiguousRegs = false; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC, + TRI); + MBB.addLiveIn(Reg); + } + } + return true; +} + + +bool HexagonFrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + + MachineFunction *MF = MBB.getParent(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + if (CSI.empty()) { + return false; + } + + // We can only schedule double loads if we spill contiguous callee-saved regs + // For instance, we cannot scheduled double-word loads if we spill r24, + // r26, and r27. + // Hexagon_TODO: We can try to double-word align odd registers for -O2 and + // above. + bool ContiguousRegs = true; + + for (unsigned i = 0; i < CSI.size(); ++i) { + unsigned Reg = CSI[i].getReg(); + + // + // Check if we can use a double-word load. + // + const unsigned* SuperReg = TRI->getSuperRegisters(Reg); + const TargetRegisterClass* SuperRegClass = 0; + + // Assume that there is exactly one superreg. + assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg"); + bool CanUseDblLoad = false; + if (ContiguousRegs && (i < CSI.size()-1)) { + const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg()); + assert(SuperRegNext[0] && !SuperRegNext[1] && + "Expected exactly one superreg"); + SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]); + CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]); + } + + + if (CanUseDblLoad) { + TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(), + SuperRegClass, TRI); + MBB.addLiveIn(SuperReg[0]); + ++i; + } else { + // Cannot use a double-word load. + ContiguousRegs = false; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); + MBB.addLiveIn(Reg); + } + } + return true; +} + +int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + return MF.getFrameInfo()->getObjectOffset(FI); +} diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h new file mode 100644 index 0000000..ad87f11 --- /dev/null +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -0,0 +1,50 @@ +//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGON_FRAMEINFO_H +#define HEXAGON_FRAMEINFO_H + +#include "Hexagon.h" +#include "HexagonSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + +class HexagonFrameLowering : public TargetFrameLowering { +private: + const HexagonSubtarget &STI; + void determineFrameLayout(MachineFunction &MF) const; + +public: + explicit HexagonFrameLowering(const HexagonSubtarget &sti) + : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual bool + spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + virtual bool + restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; + bool hasFP(const MachineFunction &MF) const; + bool hasTailCall(MachineBasicBlock &MBB) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp new file mode 100644 index 0000000..c1abc4a --- /dev/null +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -0,0 +1,644 @@ +//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass identifies loops where we can generate the Hexagon hardware +// loop instruction. The hardware loop can perform loop branches with a +// zero-cycle overhead. +// +// The pattern that defines the induction variable can changed depending on +// prior optimizations. For example, the IndVarSimplify phase run by 'opt' +// normalizes induction variables, and the Loop Strength Reduction pass +// run by 'llc' may also make changes to the induction variable. +// The pattern detected by this phase is due to running Strength Reduction. +// +// Criteria for hardware loops: +// - Countable loops (w/ ind. var for a trip count) +// - Assumes loops are normalized by IndVarSimplify +// - Try inner-most loops first +// - No nested hardware loops. +// - No function calls in loops. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hwloops" +#include "llvm/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); + +namespace { + class CountValue; + struct HexagonHardwareLoops : public MachineFunctionPass { + MachineLoopInfo *MLI; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification, replacement for typeid + + HexagonHardwareLoops() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loops"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// getCanonicalInductionVariable - Check to see if the loop has a canonical + /// induction variable. + /// Should be defined in MachineLoop. Based upon version in class Loop. + const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const; + + /// getTripCount - Return a loop-invariant LLVM register indicating the + /// number of times the loop will be executed. If the trip-count cannot + /// be determined, this return null. + CountValue *getTripCount(MachineLoop *L) const; + + /// isInductionOperation - Return true if the instruction matches the + /// pattern for an opertion that defines an induction variable. + bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const; + + /// isInvalidOperation - Return true if the instruction is not valid within + /// a hardware loop. + bool isInvalidLoopOperation(const MachineInstr *MI) const; + + /// containsInavlidInstruction - Return true if the loop contains an + /// instruction that inhibits using the hardware loop. + bool containsInvalidInstruction(MachineLoop *L) const; + + /// converToHardwareLoop - Given a loop, check if we can convert it to a + /// hardware loop. If so, then perform the conversion and return true. + bool convertToHardwareLoop(MachineLoop *L); + + }; + + char HexagonHardwareLoops::ID = 0; + + + // CountValue class - Abstraction for a trip count of a loop. A + // smaller vesrsion of the MachineOperand class without the concerns + // of changing the operand representation. + class CountValue { + public: + enum CountValueType { + CV_Register, + CV_Immediate + }; + private: + CountValueType Kind; + union Values { + unsigned RegNum; + int64_t ImmVal; + Values(unsigned r) : RegNum(r) {} + Values(int64_t i) : ImmVal(i) {} + } Contents; + bool isNegative; + + public: + CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r), + isNegative(neg) {} + explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i), + isNegative(i < 0) {} + CountValueType getType() const { return Kind; } + bool isReg() const { return Kind == CV_Register; } + bool isImm() const { return Kind == CV_Immediate; } + bool isNeg() const { return isNegative; } + + unsigned getReg() const { + assert(isReg() && "Wrong CountValue accessor"); + return Contents.RegNum; + } + void setReg(unsigned Val) { + Contents.RegNum = Val; + } + int64_t getImm() const { + assert(isImm() && "Wrong CountValue accessor"); + if (isNegative) { + return -Contents.ImmVal; + } + return Contents.ImmVal; + } + void setImm(int64_t Val) { + Contents.ImmVal = Val; + } + + void print(raw_ostream &OS, const TargetMachine *TM = 0) const { + if (isReg()) { OS << PrintReg(getReg()); } + if (isImm()) { OS << getImm(); } + } + }; + + struct HexagonFixupHwLoops : public MachineFunctionPass { + public: + static char ID; // Pass identification, replacement for typeid. + + HexagonFixupHwLoops() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + /// Maximum distance between the loop instr and the basic block. + /// Just an estimate. + static const unsigned MAX_LOOP_DISTANCE = 200; + + /// fixupLoopInstrs - Check the offset between each loop instruction and + /// the loop basic block to determine if we can use the LOOP instruction + /// or if we need to set the LC/SA registers explicitly. + bool fixupLoopInstrs(MachineFunction &MF); + + /// convertLoopInstr - Add the instruction to set the LC and SA registers + /// explicitly. + void convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS); + + }; + + char HexagonFixupHwLoops::ID = 0; + +} // end anonymous namespace + + +/// isHardwareLoop - Returns true if the instruction is a hardware loop +/// instruction. +static bool isHardwareLoop(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::LOOP0_r || + MI->getOpcode() == Hexagon::LOOP0_i; +} + +/// isCompareEquals - Returns true if the instruction is a compare equals +/// instruction with an immediate operand. +static bool isCompareEqualsImm(const MachineInstr *MI) { + return MI->getOpcode() == Hexagon::CMPEQri; +} + + +/// createHexagonHardwareLoops - Factory for creating +/// the hardware loop phase. +FunctionPass *llvm::createHexagonHardwareLoops() { + return new HexagonHardwareLoops(); +} + + +bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); + + bool Changed = false; + + // get the loop information + MLI = &getAnalysis<MachineLoopInfo>(); + // get the register information + MRI = &MF.getRegInfo(); + // the target specific instructio info. + TII = MF.getTarget().getInstrInfo(); + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); + I != E; ++I) { + MachineLoop *L = *I; + if (!L->getParentLoop()) { + Changed |= convertToHardwareLoop(L); + } + } + + return Changed; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable. We check for a simple recurrence pattern - an +/// integer recurrence that decrements by one each time through the loop and +/// ends at zero. If so, return the phi node that corresponds to it. +/// +/// Based upon the similar code in LoopInfo except this code is specific to +/// the machine. +/// This method assumes that the IndVarSimplify pass has been run by 'opt'. +/// +const MachineInstr +*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const { + MachineBasicBlock *TopMBB = L->getTopBlock(); + MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(); + assert(PI != TopMBB->pred_end() && + "Loop must have more than one incoming edge!"); + MachineBasicBlock *Backedge = *PI++; + if (PI == TopMBB->pred_end()) return 0; // dead loop + MachineBasicBlock *Incoming = *PI++; + if (PI != TopMBB->pred_end()) return 0; // multiple backedges? + + // make sure there is one incoming and one backedge and determine which + // is which. + if (L->contains(Incoming)) { + if (L->contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!L->contains(Backedge)) + return 0; + + // Loop over all of the PHI nodes, looking for a canonical induction variable: + // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2". + // - The recurrence comes from the backedge. + // - the definition is an induction operatio.n + for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *MPhi = &*I; + unsigned DefReg = MPhi->getOperand(0).getReg(); + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) { + // Check each operand for the value from the backedge. + MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB(); + if (L->contains(MBB)) { // operands comes from the backedge + // Check if the definition is an induction operation. + const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg()); + if (isInductionOperation(DI, DefReg)) { + return MPhi; + } + } + } + } + return 0; +} + +/// getTripCount - Return a loop-invariant LLVM value indicating the +/// number of times the loop will be executed. The trip count can +/// be either a register or a constant value. If the trip-count +/// cannot be determined, this returns null. +/// +/// We find the trip count from the phi instruction that defines the +/// induction variable. We follow the links to the CMP instruction +/// to get the trip count. +/// +/// Based upon getTripCount in LoopInfo. +/// +CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { + // Check that the loop has a induction variable. + const MachineInstr *IV_Inst = getCanonicalInductionVariable(L); + if (IV_Inst == 0) return 0; + + // Canonical loops will end with a 'cmpeq_ri IV, Imm', + // if Imm is 0, get the count from the PHI opnd + // if Imm is -M, than M is the count + // Otherwise, Imm is the count + const MachineOperand *IV_Opnd; + const MachineOperand *InitialValue; + if (!L->contains(IV_Inst->getOperand(2).getMBB())) { + InitialValue = &IV_Inst->getOperand(1); + IV_Opnd = &IV_Inst->getOperand(3); + } else { + InitialValue = &IV_Inst->getOperand(3); + IV_Opnd = &IV_Inst->getOperand(1); + } + + // Look for the cmp instruction to determine if we + // can get a useful trip count. The trip count can + // be either a register or an immediate. The location + // of the value depends upon the type (reg or imm). + while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + const MachineInstr *MI = IV_Opnd->getParent(); + if (L->contains(MI) && isCompareEqualsImm(MI)) { + const MachineOperand &MO = MI->getOperand(2); + assert(MO.isImm() && "IV Cmp Operand should be 0"); + int64_t ImmVal = MO.getImm(); + + const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg()); + assert(L->contains(IV_DefInstr->getParent()) && + "IV definition should occurs in loop"); + int64_t iv_value = IV_DefInstr->getOperand(2).getImm(); + + if (ImmVal == 0) { + // Make sure the induction variable changes by one on each iteration. + if (iv_value != 1 && iv_value != -1) { + return 0; + } + return new CountValue(InitialValue->getReg(), iv_value > 0); + } else { + assert(InitialValue->isReg() && "Expecting register for init value"); + const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg()); + if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) { + int64_t count = ImmVal - DefInstr->getOperand(1).getImm(); + if ((count % iv_value) != 0) { + return 0; + } + return new CountValue(count/iv_value); + } + } + } + } + return 0; +} + +/// isInductionOperation - return true if the operation is matches the +/// pattern that defines an induction variable: +/// add iv, c +/// +bool +HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI, + unsigned IVReg) const { + return (MI->getOpcode() == + Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg); +} + +/// isInvalidOperation - Return true if the operation is invalid within +/// hardware loop. +bool +HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const { + + // call is not allowed because the callee may use a hardware loop + if (MI->getDesc().isCall()) { + return true; + } + // do not allow nested hardware loops + if (isHardwareLoop(MI)) { + return true; + } + // check if the instruction defines a hardware loop register + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && + (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 || + MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) { + return true; + } + } + return false; +} + +/// containsInvalidInstruction - Return true if the loop contains +/// an instruction that inhibits the use of the hardware loop function. +/// +bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const { + const std::vector<MachineBasicBlock*> Blocks = L->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *MBB = Blocks[i]; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { + const MachineInstr *MI = &*MII; + if (isInvalidLoopOperation(MI)) { + return true; + } + } + } + return false; +} + +/// converToHardwareLoop - check if the loop is a candidate for +/// converting to a hardware loop. If so, then perform the +/// transformation. +/// +/// This function works on innermost loops first. A loop can +/// be converted if it is a counting loop; either a register +/// value or an immediate. +/// +/// The code makes several assumptions about the representation +/// of the loop in llvm. +bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { + bool Changed = false; + // Process nested loops first. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + Changed |= convertToHardwareLoop(*I); + } + // If a nested loop has been converted, then we can't convert this loop. + if (Changed) { + return Changed; + } + // Are we able to determine the trip count for the loop? + CountValue *TripCount = getTripCount(L); + if (TripCount == 0) { + return false; + } + // Does the loop contain any invalid instructions? + if (containsInvalidInstruction(L)) { + return false; + } + MachineBasicBlock *Preheader = L->getLoopPreheader(); + // No preheader means there's not place for the loop instr. + if (Preheader == 0) { + return false; + } + MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator(); + + MachineBasicBlock *LastMBB = L->getExitingBlock(); + // Don't generate hw loop if the loop has more than one exit. + if (LastMBB == 0) { + return false; + } + MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); + + // Determine the loop start. + MachineBasicBlock *LoopStart = L->getTopBlock(); + if (L->getLoopLatch() != LastMBB) { + // When the exit and latch are not the same, use the latch block as the + // start. + // The loop start address is used only after the 1st iteration, and the loop + // latch may contains instrs. that need to be executed after the 1st iter. + LoopStart = L->getLoopLatch(); + // Make sure the latch is a successor of the exit, otherwise it won't work. + if (!LastMBB->isSuccessor(LoopStart)) { + return false; + } + } + + // Convert the loop to a hardware loop + DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); + + if (TripCount->isReg()) { + // Create a copy of the loop count register. + MachineFunction *MF = LastMBB->getParent(); + const TargetRegisterClass *RC = + MF->getRegInfo().getRegClass(TripCount->getReg()); + unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC); + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg()); + if (TripCount->isNeg()) { + unsigned CountReg1 = CountReg; + CountReg = MF->getRegInfo().createVirtualRegister(RC); + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(Hexagon::NEG), CountReg).addReg(CountReg1); + } + + // Add the Loop instruction to the begining of the loop. + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg); + } else { + assert(TripCount->isImm() && "Expecting immedate vaule for trip count"); + // Add the Loop immediate instruction to the beginning of the loop. + int64_t CountImm = TripCount->getImm(); + BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(), + TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm); + } + + // Make sure the loop start always has a reference in the CFG. We need to + // create a BlockAddress operand to get this mechanism to work both the + // MachineBasicBlock and BasicBlock objects need the flag set. + LoopStart->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object + BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock())); + + // Replace the loop branch with an endloop instruction. + DebugLoc dl = LastI->getDebugLoc(); + BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart); + + // The loop ends with either: + // - a conditional branch followed by an unconditional branch, or + // - a conditional branch to the loop start. + if (LastI->getOpcode() == Hexagon::JMP_Pred || + LastI->getOpcode() == Hexagon::JMP_PredNot) { + // delete one and change/add an uncond. branch to out of the loop + MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB(); + LastI = LastMBB->erase(LastI); + if (!L->contains(BranchTarget)) { + if (LastI != LastMBB->end()) { + TII->RemoveBranch(*LastMBB); + } + SmallVector<MachineOperand, 0> Cond; + TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl); + } + } else { + // Conditional branch to loop start; just delete it. + LastMBB->erase(LastI); + } + delete TripCount; + + ++NumHWLoops; + return true; +} + +/// createHexagonFixupHwLoops - Factory for creating the hardware loop +/// phase. +FunctionPass *llvm::createHexagonFixupHwLoops() { + return new HexagonFixupHwLoops(); +} + +bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n"); + + bool Changed = fixupLoopInstrs(MF); + return Changed; +} + +/// fixupLoopInsts - For Hexagon, if the loop label is to far from the +/// loop instruction then we need to set the LC0 and SA0 registers +/// explicitly instead of using LOOP(start,count). This function +/// checks the distance, and generates register assignments if needed. +/// +/// This function makes two passes over the basic blocks. The first +/// pass computes the offset of the basic block from the start. +/// The second pass checks all the loop instructions. +bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { + + // Offset of the current instruction from the start. + unsigned InstOffset = 0; + // Map for each basic block to it's first instruction. + DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset; + + // First pass - compute the offset of each basic block. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + BlockToInstOffset[MBB] = InstOffset; + InstOffset += (MBB->size() * 4); + } + + // Second pass - check each loop instruction to see if it needs to + // be converted. + InstOffset = 0; + bool Changed = false; + RegScavenger RS; + + // Loop over all the basic blocks. + for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end(); + MBB != MBBe; ++MBB) { + InstOffset = BlockToInstOffset[MBB]; + RS.enterBasicBlock(MBB); + + // Loop over all the instructions. + MachineBasicBlock::iterator MIE = MBB->end(); + MachineBasicBlock::iterator MII = MBB->begin(); + while (MII != MIE) { + if (isHardwareLoop(MII)) { + RS.forward(MII); + assert(MII->getOperand(0).isMBB() && + "Expect a basic block as loop operand"); + int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; + diff = (diff > 0 ? diff : -diff); + if ((unsigned)diff > MAX_LOOP_DISTANCE) { + // Convert to explicity setting LC0 and SA0. + convertLoopInstr(MF, MII, RS); + MII = MBB->erase(MII); + Changed = true; + } else { + ++MII; + } + } else { + ++MII; + } + InstOffset += 4; + } + } + + return Changed; + +} + +/// convertLoopInstr - convert a loop instruction to a sequence of instructions +/// that set the lc and sa register explicitly. +void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF, + MachineBasicBlock::iterator &MII, + RegScavenger &RS) { + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + MachineBasicBlock *MBB = MII->getParent(); + DebugLoc DL = MII->getDebugLoc(); + unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0); + + // First, set the LC0 with the trip count. + if (MII->getOperand(1).isReg()) { + // Trip count is a register + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(MII->getOperand(1).getReg()); + } else { + // Trip count is an immediate. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch) + .addImm(MII->getOperand(1).getImm()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0) + .addReg(Scratch); + } + // Then, set the SA0 with the loop start address. + BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch) + .addMBB(MII->getOperand(0).getMBB()); + BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch); +} diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp new file mode 100644 index 0000000..4deab9f --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -0,0 +1,1495 @@ +//==-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon ----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the Hexagon target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-isel" +#include "HexagonISelLowering.h" +#include "HexagonTargetMachine.h" +#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------===// +/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine +/// instructions for SelectionDAG operations. +/// +namespace { +class HexagonDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can + /// make the right decision when generating code for different targets. + const HexagonSubtarget &Subtarget; + + // Keep a reference to HexagonTargetMachine. + HexagonTargetMachine& TM; + const HexagonInstrInfo *TII; + +public: + explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine) + : SelectionDAGISel(targetmachine), + Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()), + TM(targetmachine), + TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) { + + } + + SDNode *Select(SDNode *N); + + // Complex Pattern Selectors. + bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); + bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); + + virtual const char *getPassName() const { + return "Hexagon DAG->DAG Pattern Instruction Selection"; + } + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); + bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); + + SDNode *SelectLoad(SDNode *N); + SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl); + SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl); + SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode, + DebugLoc dl); + SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode, + DebugLoc dl); + SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl); + SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl); + SDNode *SelectStore(SDNode *N); + SDNode *SelectSHL(SDNode *N); + SDNode *SelectSelect(SDNode *N); + SDNode *SelectTruncate(SDNode *N); + SDNode *SelectMul(SDNode *N); + SDNode *SelectZeroExtend(SDNode *N); + SDNode *SelectIntrinsicWOChain(SDNode *N); + SDNode *SelectConstant(SDNode *N); + SDNode *SelectAdd(SDNode *N); + + // Include the pieces autogenerated from the target description. +#include "HexagonGenDAGISel.inc" +}; +} // end anonymous namespace + + +/// createHexagonISelDag - This pass converts a legalized DAG into a +/// Hexagon-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) { + return new HexagonDAGToDAGISel(TM); +} + +static bool IsS11_0_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<11>(v); +} + + +static bool IsS11_1_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,1>(v); +} + + +static bool IsS11_2_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,2>(v); +} + + +static bool IsS11_3_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,3>(v); +} + + +static bool IsU6_0_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +} + + +static bool IsU6_1_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,1>(v); +} + + +static bool IsU6_2_Offset(SDNode * S) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // u6 predicate - True if the immediate fits in a 6-bit unsigned extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,2>(v); +} + + +// Intrinsics that return a a predicate. +static unsigned doesIntrinsicReturnPredicate(unsigned ID) +{ + switch (ID) { + default: + return 0; + case Intrinsic::hexagon_C2_cmpeq: + case Intrinsic::hexagon_C2_cmpgt: + case Intrinsic::hexagon_C2_cmpgtu: + case Intrinsic::hexagon_C2_cmpgtup: + case Intrinsic::hexagon_C2_cmpgtp: + case Intrinsic::hexagon_C2_cmpeqp: + case Intrinsic::hexagon_C2_bitsset: + case Intrinsic::hexagon_C2_bitsclr: + case Intrinsic::hexagon_C2_cmpeqi: + case Intrinsic::hexagon_C2_cmpgti: + case Intrinsic::hexagon_C2_cmpgtui: + case Intrinsic::hexagon_C2_cmpgei: + case Intrinsic::hexagon_C2_cmpgeui: + case Intrinsic::hexagon_C2_cmplt: + case Intrinsic::hexagon_C2_cmpltu: + case Intrinsic::hexagon_C2_bitsclri: + case Intrinsic::hexagon_C2_and: + case Intrinsic::hexagon_C2_or: + case Intrinsic::hexagon_C2_xor: + case Intrinsic::hexagon_C2_andn: + case Intrinsic::hexagon_C2_not: + case Intrinsic::hexagon_C2_orn: + case Intrinsic::hexagon_C2_pxfer_map: + case Intrinsic::hexagon_C2_any8: + case Intrinsic::hexagon_C2_all8: + case Intrinsic::hexagon_A2_vcmpbeq: + case Intrinsic::hexagon_A2_vcmpbgtu: + case Intrinsic::hexagon_A2_vcmpheq: + case Intrinsic::hexagon_A2_vcmphgt: + case Intrinsic::hexagon_A2_vcmphgtu: + case Intrinsic::hexagon_A2_vcmpweq: + case Intrinsic::hexagon_A2_vcmpwgt: + case Intrinsic::hexagon_A2_vcmpwgtu: + case Intrinsic::hexagon_C2_tfrrp: + case Intrinsic::hexagon_S2_tstbit_i: + case Intrinsic::hexagon_S2_tstbit_r: + return 1; + } +} + + +// Intrinsics that have predicate operands. +static unsigned doesIntrinsicContainPredicate(unsigned ID) +{ + switch (ID) { + default: + return 0; + case Intrinsic::hexagon_C2_tfrpr: + return Hexagon::TFR_RsPd; + case Intrinsic::hexagon_C2_and: + return Hexagon::AND_pp; + case Intrinsic::hexagon_C2_xor: + return Hexagon::XOR_pp; + case Intrinsic::hexagon_C2_or: + return Hexagon::OR_pp; + case Intrinsic::hexagon_C2_not: + return Hexagon::NOT_pp; + case Intrinsic::hexagon_C2_any8: + return Hexagon::ANY_pp; + case Intrinsic::hexagon_C2_all8: + return Hexagon::ALL_pp; + case Intrinsic::hexagon_C2_vitpack: + return Hexagon::VITPACK_pp; + case Intrinsic::hexagon_C2_mask: + return Hexagon::MASK_p; + case Intrinsic::hexagon_C2_mux: + return Hexagon::MUX_rr; + + // Mapping hexagon_C2_muxir to MUX_pri. This is pretty weird - but + // that's how it's mapped in q6protos.h. + case Intrinsic::hexagon_C2_muxir: + return Hexagon::MUX_ri; + + // Mapping hexagon_C2_muxri to MUX_pir. This is pretty weird - but + // that's how it's mapped in q6protos.h. + case Intrinsic::hexagon_C2_muxri: + return Hexagon::MUX_ir; + + case Intrinsic::hexagon_C2_muxii: + return Hexagon::MUX_ii; + case Intrinsic::hexagon_C2_vmux: + return Hexagon::VMUX_prr64; + case Intrinsic::hexagon_S2_valignrb: + return Hexagon::VALIGN_rrp; + case Intrinsic::hexagon_S2_vsplicerb: + return Hexagon::VSPLICE_rrp; + } +} + + +static bool OffsetFitsS11(EVT MemType, int64_t Offset) { + if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) { + return true; + } + if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) { + return true; + } + if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) { + return true; + } + if (MemType == MVT::i8 && isInt<11>(Offset)) { + return true; + } + return false; +} + + +// +// Try to lower loads of GlobalAdresses into base+offset loads. Custom +// lowering for GlobalAddress nodes has already turned it into a +// CONST32. +// +SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) { + EVT LoadedVT = LD->getMemoryVT(); + SDValue Chain = LD->getChain(); + SDNode* Const32 = LD->getBasePtr().getNode(); + unsigned Opcode = 0; + + if (Const32->getOpcode() == HexagonISD::CONST32 && + ISD::isNormalLoad(LD)) { + SDValue Base = Const32->getOperand(0); + EVT LoadedVT = LD->getMemoryVT(); + int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); + if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) { + MVT PointerTy = TLI.getPointerTy(); + const GlobalValue* GV = + cast<GlobalAddressSDNode>(Base)->getGlobal(); + SDValue TargAddr = + CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); + SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, + dl, PointerTy, + TargAddr); + // Figure out base + offset opcode + if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed; + else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed; + else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed; + else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed; + else assert (0 && "unknown memory type"); + + // Build indexed load. + SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy); + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::Other, + SDValue(NewBase,0), + TargetConstOff, + Chain); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + ReplaceUses(LD, Result); + return Result; + } + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD, + unsigned Opcode, + DebugLoc dl) +{ + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + SDValue N1 = LD->getOperand(1); + SDValue CPTmpN1_0; + SDValue CPTmpN1_1; + if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && + N1.getNode()->getValueType(0) == MVT::i32) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, + MVT::Other, Base, TargetConst, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Base, TargetConst0, + Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, + MVT::i64, SDValue(Result_1, 0)); + SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, + MVT::i32, Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_2, 0), + SDValue(Result_3, 0), + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_2; + } + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD, + unsigned Opcode, + DebugLoc dl) +{ + SDValue Chain = LD->getChain(); + EVT LoadedVT = LD->getMemoryVT(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + SDValue N1 = LD->getOperand(1); + SDValue CPTmpN1_0; + SDValue CPTmpN1_1; + if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) && + N1.getNode()->getValueType(0) == MVT::i32) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2,0), + SDValue(Result_1,0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_3, 0), + SDValue(Result_1, 1), + SDValue(Result_1, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result_3; + } + + // Generate an indirect load. + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, + Base, TargetConst0, Chain); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2,0), + SDValue(Result_1,0)); + // Add offset to base. + SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_3, 0), // Load value. + SDValue(Result_4, 0), // New address. + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_3; + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Offset = LD->getOffset(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT LoadedVT = LD->getMemoryVT(); + unsigned Opcode = 0; + + // Check for zero ext loads. + bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD); + + // Figure out the opcode. + if (LoadedVT == MVT::i64) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::POST_LDrid; + else + Opcode = Hexagon::LDrid; + } else if (LoadedVT == MVT::i32) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = Hexagon::POST_LDriw; + else + Opcode = Hexagon::LDriw; + } else if (LoadedVT == MVT::i16) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih; + else + Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih; + } else if (LoadedVT == MVT::i8) { + if (TII->isValidAutoIncImm(LoadedVT, Val)) + Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib; + else + Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib; + } else + assert (0 && "unknown memory type"); + + // For zero ext i64 loads, we need to add combine instructions. + if (LD->getValueType(0) == MVT::i64 && + LD->getExtensionType() == ISD::ZEXTLOAD) { + return SelectIndexedLoadZeroExtend64(LD, Opcode, dl); + } + if (LD->getValueType(0) == MVT::i64 && + LD->getExtensionType() == ISD::SEXTLOAD) { + // Handle sign ext i64 loads. + return SelectIndexedLoadSignExtend64(LD, Opcode, dl); + } + if (TII->isValidAutoIncImm(LoadedVT, Val)) { + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::i32, MVT::Other, Base, + TargetConstVal, Chain); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result, 0), + SDValue(Result, 1), + SDValue(Result, 2) + }; + ReplaceUses(Froms, Tos, 3); + return Result; + } else { + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + MVT::Other, Base, TargetConst0, + Chain); + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, TargetConstVal, + SDValue(Result_1, 1)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = LD->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + const SDValue Froms[] = { SDValue(LD, 0), + SDValue(LD, 1), + SDValue(LD, 2) + }; + const SDValue Tos[] = { SDValue(Result_1, 0), + SDValue(Result_2, 0), + SDValue(Result_1, 1) + }; + ReplaceUses(Froms, Tos, 3); + return Result_1; + } + + return SelectCode(LD); +} + + +SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) { + SDNode *result; + DebugLoc dl = N->getDebugLoc(); + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + + // Handle indexed loads. + if (AM != ISD::UNINDEXED) { + result = SelectIndexedLoad(LD, dl); + } else { + result = SelectBaseOffsetLoad(LD, dl); + } + + return result; +} + + +SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) { + SDValue Chain = ST->getChain(); + SDValue Base = ST->getBasePtr(); + SDValue Offset = ST->getOffset(); + SDValue Value = ST->getValue(); + SDNode *OffsetNode = Offset.getNode(); + // Get the constant value. + int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue(); + EVT StoredVT = ST->getMemoryVT(); + + // Offset value must be within representable range + // and must have correct alignment properties. + if (TII->isValidAutoIncImm(StoredVT, Val)) { + SDValue Ops[] = { Value, Base, + CurDAG->getTargetConstant(Val, MVT::i32), Chain}; + unsigned Opcode = 0; + + // Figure out the post inc version of opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri; + else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri; + else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri; + else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri; + else assert (0 && "unknown memory type"); + + // Build post increment store. + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32, + MVT::Other, Ops, 4); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(ST, Result); + ReplaceUses(SDValue(ST,1), SDValue(Result,1)); + return Result; + } + + // Note: Order of operands matches the def of instruction: + // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ... + // and it differs for POST_ST* for instance. + SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value, + Chain}; + unsigned Opcode = 0; + + // Figure out the opcode. + if (StoredVT == MVT::i64) Opcode = Hexagon::STrid; + else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw; + else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih; + else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib; + else assert (0 && "unknown memory type"); + + // Build regular store. + SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32); + SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, + 4); + // Build splitted incriment instruction. + SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32, + Base, + TargetConstVal, + SDValue(Result_1, 0)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(SDValue(ST,0), SDValue(Result_2,0)); + ReplaceUses(SDValue(ST,1), SDValue(Result_1,0)); + return Result_2; +} + + +SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST, + DebugLoc dl) { + SDValue Chain = ST->getChain(); + SDNode* Const32 = ST->getBasePtr().getNode(); + SDValue Value = ST->getValue(); + unsigned Opcode = 0; + + // Try to lower stores of GlobalAdresses into indexed stores. Custom + // lowering for GlobalAddress nodes has already turned it into a + // CONST32. Avoid truncating stores for the moment. Post-inc stores + // do the same. Don't think there's a reason for it, so will file a + // bug to fix. + if ((Const32->getOpcode() == HexagonISD::CONST32) && + !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) { + SDValue Base = Const32->getOperand(0); + if (Base.getOpcode() == ISD::TargetGlobalAddress) { + EVT StoredVT = ST->getMemoryVT(); + int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset(); + if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) { + MVT PointerTy = TLI.getPointerTy(); + const GlobalValue* GV = + cast<GlobalAddressSDNode>(Base)->getGlobal(); + SDValue TargAddr = + CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0); + SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set, + dl, PointerTy, + TargAddr); + + // Figure out base + offset opcode + if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed; + else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed; + else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed; + else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed; + else assert (0 && "unknown memory type"); + + SDValue Ops[] = {SDValue(NewBase,0), + CurDAG->getTargetConstant(Offset,PointerTy), + Value, Chain}; + // build indexed store + SDNode* Result = CurDAG->getMachineNode(Opcode, dl, + MVT::Other, Ops, 4); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = ST->getMemOperand(); + cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); + ReplaceUses(ST, Result); + return Result; + } + } + } + + return SelectCode(ST); +} + + +SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(N); + ISD::MemIndexedMode AM = ST->getAddressingMode(); + + // Handle indexed stores. + if (AM != ISD::UNINDEXED) { + return SelectIndexedStore(ST, dl); + } + + return SelectBaseOffsetStore(ST, dl); +} + +SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + + // + // %conv.i = sext i32 %tmp1 to i64 + // %conv2.i = sext i32 %add to i64 + // %mul.i = mul nsw i64 %conv2.i, %conv.i + // + // --- match with the following --- + // + // %mul.i = mpy (%tmp1, %add) + // + + if (N->getValueType(0) == MVT::i64) { + // Shifting a i64 signed multiply. + SDValue MulOp0 = N->getOperand(0); + SDValue MulOp1 = N->getOperand(1); + + SDValue OP0; + SDValue OP1; + + // Handle sign_extend and sextload. + if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext0 = MulOp0.getOperand(0); + if (Sext0.getNode()->getValueType(0) != MVT::i32) { + SelectCode(N); + } + + OP0 = Sext0; + } else if (MulOp0.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Same goes for the second operand. + if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext1 = MulOp1.getOperand(0); + if (Sext1.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP1 = Sext1; + } else if (MulOp1.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), TargetConst0, + Chain), 0); + } else { + return SelectCode(N); + } + + // Generate a mpy instruction. + SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64, + OP0, OP1); + ReplaceUses(N, Result); + return Result; + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() == ISD::SETCC) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) { + SDValue N000 = N00.getOperand(0); + SDValue N001 = N00.getOperand(1); + if (cast<VTSDNode>(N001)->getVT() == MVT::i16) { + SDValue N01 = N0.getOperand(1); + SDValue N02 = N0.getOperand(2); + + // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, + // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1, + // IntRegs:i32:$src2) + // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) + // Pattern complexity = 9 cost = 1 size = 0. + if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) { + SDValue N1 = N->getOperand(1); + if (N01 == N1) { + SDValue N2 = N->getOperand(2); + if (N000 == N2 && + N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && + N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + MVT::i32, N000); + SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl, + MVT::i32, + SDValue(SextNode, 0), + N1); + ReplaceUses(N, Result); + return Result; + } + } + } + + // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2, + // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1, + // IntRegs:i32:$src2) + // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2) + // Pattern complexity = 9 cost = 1 size = 0. + if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) { + SDValue N1 = N->getOperand(1); + if (N01 == N1) { + SDValue N2 = N->getOperand(2); + if (N000 == N2 && + N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 && + N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) { + SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl, + MVT::i32, N000); + SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl, + MVT::i32, + SDValue(SextNode, 0), + N1); + ReplaceUses(N, Result); + return Result; + } + } + } + } + } + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDValue Shift = N->getOperand(0); + + // + // %conv.i = sext i32 %tmp1 to i64 + // %conv2.i = sext i32 %add to i64 + // %mul.i = mul nsw i64 %conv2.i, %conv.i + // %shr5.i = lshr i64 %mul.i, 32 + // %conv3.i = trunc i64 %shr5.i to i32 + // + // --- match with the following --- + // + // %conv3.i = mpy (%tmp1, %add) + // + // Trunc to i32. + if (N->getValueType(0) == MVT::i32) { + // Trunc from i64. + if (Shift.getNode()->getValueType(0) == MVT::i64) { + // Trunc child is logical shift right. + if (Shift.getOpcode() != ISD::SRL) { + return SelectCode(N); + } + + SDValue ShiftOp0 = Shift.getOperand(0); + SDValue ShiftOp1 = Shift.getOperand(1); + + // Shift by const 32 + if (ShiftOp1.getOpcode() != ISD::Constant) { + return SelectCode(N); + } + + int32_t ShiftConst = + cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue(); + if (ShiftConst != 32) { + return SelectCode(N); + } + + // Shifting a i64 signed multiply + SDValue Mul = ShiftOp0; + if (Mul.getOpcode() != ISD::MUL) { + return SelectCode(N); + } + + SDValue MulOp0 = Mul.getOperand(0); + SDValue MulOp1 = Mul.getOperand(1); + + SDValue OP0; + SDValue OP1; + + // Handle sign_extend and sextload + if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext0 = MulOp0.getOperand(0); + if (Sext0.getNode()->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + OP0 = Sext0; + } else if (MulOp0.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), + TargetConst0, Chain), 0); + } else { + return SelectCode(N); + } + + // Same goes for the second operand. + if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { + SDValue Sext1 = MulOp1.getOperand(0); + if (Sext1.getNode()->getValueType(0) != MVT::i32) + return SelectCode(N); + + OP1 = Sext1; + } else if (MulOp1.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode()); + if (LD->getMemoryVT() != MVT::i32 || + LD->getExtensionType() != ISD::SEXTLOAD || + LD->getAddressingMode() != ISD::UNINDEXED) { + return SelectCode(N); + } + + SDValue Base = LD->getBasePtr(); + SDValue Chain = LD->getChain(); + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32, + MVT::Other, + LD->getBasePtr(), + TargetConst0, Chain), 0); + } else { + return SelectCode(N); + } + + // Generate a mpy instruction. + SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32, + OP0, OP1); + ReplaceUses(N, Result); + return Result; + } + } + + return SelectCode(N); +} + + +SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) == MVT::i32) { + SDValue Shl_0 = N->getOperand(0); + SDValue Shl_1 = N->getOperand(1); + // RHS is const. + if (Shl_1.getOpcode() == ISD::Constant) { + if (Shl_0.getOpcode() == ISD::MUL) { + SDValue Mul_0 = Shl_0.getOperand(0); // Val + SDValue Mul_1 = Shl_0.getOperand(1); // Const + // RHS of mul is const. + if (Mul_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t MulConst = + cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue(); + int32_t ValConst = MulConst << ShlConst; + SDValue Val = CurDAG->getTargetConstant(ValConst, + MVT::i32); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, + MVT::i32, Mul_0, Val); + ReplaceUses(N, Result); + return Result; + } + + } + } else if (Shl_0.getOpcode() == ISD::SUB) { + SDValue Sub_0 = Shl_0.getOperand(0); // Const 0 + SDValue Sub_1 = Shl_0.getOperand(1); // Val + if (Sub_0.getOpcode() == ISD::Constant) { + int32_t SubConst = + cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue(); + if (SubConst == 0) { + if (Sub_1.getOpcode() == ISD::SHL) { + SDValue Shl2_0 = Sub_1.getOperand(0); // Val + SDValue Shl2_1 = Sub_1.getOperand(1); // Const + if (Shl2_1.getOpcode() == ISD::Constant) { + int32_t ShlConst = + cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue(); + int32_t Shl2Const = + cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue(); + int32_t ValConst = 1 << (ShlConst+Shl2Const); + SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32); + if (ConstantSDNode *CN = + dyn_cast<ConstantSDNode>(Val.getNode())) + if (isInt<9>(CN->getSExtValue())) { + SDNode* Result = + CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32, + Shl2_0, Val); + ReplaceUses(N, Result); + return Result; + } + } + } + } + } + } + } + } + return SelectCode(N); +} + + +// +// If there is an zero_extend followed an intrinsic in DAG (this means - the +// result of the intrinsic is predicate); convert the zero_extend to +// transfer instruction. +// +// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be +// converted into a MUX as predicate registers defined as 1 bit in the +// compiler. Architecture defines them as 8-bit registers. +// We want to preserve all the lower 8-bits and, not just 1 LSB bit. +// +SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + SDNode *IsIntrinsic = N->getOperand(0).getNode(); + if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { + unsigned ID = + cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue(); + if (doesIntrinsicReturnPredicate(ID)) { + // Now we need to differentiate target data types. + if (N->getValueType(0) == MVT::i64) { + // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs). + SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32); + SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, + MVT::i32, + TargetConst0); + SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl, + MVT::i64, MVT::Other, + SDValue(Result_2, 0), + SDValue(Result_1, 0)); + ReplaceUses(N, Result_3); + return Result_3; + } + if (N->getValueType(0) == MVT::i32) { + // Convert the zero_extend to Rs = Pd + SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl, + MVT::i32, + SDValue(IsIntrinsic, 0)); + ReplaceUses(N, RsPd); + return RsPd; + } + assert(0 && "Unexpected value type"); + } + } + return SelectCode(N); +} + + +// +// Checking for intrinsics which have predicate registers as operand(s) +// and lowering to the actual intrinsic. +// +SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID); + + // We are concerned with only those intrinsics that have predicate registers + // as at least one of the operands. + if (IntrinsicWithPred) { + SmallVector<SDValue, 8> Ops; + const MCInstrDesc &MCID = TII->get(IntrinsicWithPred); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + // Iterate over all the operands of the intrinsics. + // For PredRegs, do the transfer. + // For Double/Int Regs, just preserve the value + // For immediates, lower it. + for (unsigned i = 1; i < N->getNumOperands(); ++i) { + SDNode *Arg = N->getOperand(i).getNode(); + const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); + + if (RC == Hexagon::IntRegsRegisterClass || + RC == Hexagon::DoubleRegsRegisterClass) { + Ops.push_back(SDValue(Arg, 0)); + } else if (RC == Hexagon::PredRegsRegisterClass) { + // Do the transfer. + SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, + SDValue(Arg, 0)); + Ops.push_back(SDValue(PdRs,0)); + } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) { + // This is immediate operand. Lower it here making sure that we DO have + // const SDNode for immediate value. + int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue(); + SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32); + Ops.push_back(SDVal); + } else { + assert(0 && "Unimplemented"); + } + } + EVT ReturnValueVT = N->getValueType(0); + SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl, + ReturnValueVT, + Ops.data(), Ops.size()); + ReplaceUses(N, Result); + return Result; + } + return SelectCode(N); +} + + +// +// Map predicate true (encoded as -1 in LLVM) to a XOR. +// +SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) == MVT::i1) { + SDNode* Result; + int32_t Val = cast<ConstantSDNode>(N)->getSExtValue(); + if (Val == -1) { + unsigned NewIntReg = TM.getInstrInfo()->createVR(MF, MVT(MVT::i32)); + SDValue Reg = CurDAG->getRegister(NewIntReg, MVT::i32); + + // Create the IntReg = 1 node. + SDNode* IntRegTFR = + CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)); + + // Pd = IntReg + SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, + SDValue(IntRegTFR, 0)); + + // not(Pd) + SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_pp, dl, MVT::i1, + SDValue(Pd, 0)); + + // xor(not(Pd)) + Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1, + SDValue(Pd, 0), SDValue(NotPd, 0)); + + // We have just built: + // Rs = Pd + // Pd = xor(not(Pd), Pd) + + ReplaceUses(N, Result); + return Result; + } + } + + return SelectCode(N); +} + + +// +// Map add followed by a asr -> asr +=. +// +SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + // Identify nodes of the form: add(asr(...)). + SDNode* Src1 = N->getOperand(0).getNode(); + if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse() + || Src1->getValueType(0) != MVT::i32) { + return SelectCode(N); + } + + // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that + // Rd and Rd' are assigned to the same register + SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32, + N->getOperand(1), + Src1->getOperand(0), + Src1->getOperand(1)); + ReplaceUses(N, Result); + + return Result; +} + + +SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { + if (N->isMachineOpcode()) + return NULL; // Already selected. + + + switch (N->getOpcode()) { + case ISD::Constant: + return SelectConstant(N); + + case ISD::ADD: + return SelectAdd(N); + + case ISD::SHL: + return SelectSHL(N); + + case ISD::LOAD: + return SelectLoad(N); + + case ISD::STORE: + return SelectStore(N); + + case ISD::SELECT: + return SelectSelect(N); + + case ISD::TRUNCATE: + return SelectTruncate(N); + + case ISD::MUL: + return SelectMul(N); + + case ISD::ZERO_EXTEND: + return SelectZeroExtend(N); + + case ISD::INTRINSIC_WO_CHAIN: + return SelectIntrinsicWOChain(N); + } + + return SelectCode(N); +} + + +// +// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way +// to define these instructions. +// +bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_0_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_0_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_1_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_1_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_2_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_2_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_0_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_0_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_1_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_1_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_2_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsU6_2_Offset(Offset.getNode())); +} + + +bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + + if (Addr.getOpcode() != ISD::ADD) { + return(SelectADDRriS11_2(Addr, Base, Offset)); + } + + return SelectADDRriS11_2(Addr, Base, Offset); +} + + +bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_3_Offset(Offset.getNode())); + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return (IsS11_3_Offset(Offset.getNode())); +} + +bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, + SDValue &R2) { + if (Addr.getOpcode() == ISD::FrameIndex) return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) + if (isInt<13>(CN->getSExtValue())) + return false; // Let the reg+imm pattern catch this! + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + return true; + } + + R1 = Addr; + + return true; +} + + +// Handle generic address case. It is accessed from inlined asm =m constraints, +// which could have any kind of pointer. +bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr, + SDValue &Base, SDValue &Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; // Direct calls. + + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + + if (Addr.getOpcode() == ISD::ADD) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + + +bool HexagonDAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector<SDValue> &OutOps) { + SDValue Op0, Op1; + + switch (ConstraintCode) { + case 'o': // Offsetable. + case 'v': // Not offsetable. + default: return true; + case 'm': // Memory. + if (!SelectAddr(Op.getNode(), Op, Op0, Op1)) + return true; + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp new file mode 100644 index 0000000..0ac3cf0 --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -0,0 +1,1505 @@ +//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that Hexagon uses to lower LLVM code +// into a selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "HexagonISelLowering.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" +#include "HexagonTargetObjectFile.h" +#include "HexagonSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/Support/CommandLine.h" + +const unsigned Hexagon_MAX_RET_SIZE = 64; +using namespace llvm; + +static cl::opt<bool> +EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, + cl::desc("Control jump table emission on Hexagon target")); + +int NumNamedVarArgParams = -1; + +// Implement calling convention for Hexagon. +static bool +CC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); + +static bool +CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + // NumNamedVarArgParams can not be zero for a VarArg function. + assert ( (NumNamedVarArgParams > 0) && + "NumNamedVarArgParams is not bigger than zero."); + + if ( (int)ValNo < NumNamedVarArgParams ) { + // Deal with named arguments. + return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); + } + + // Deal with un-named arguments. + unsigned ofst; + if (ArgFlags.isByVal()) { + // If pass-by-value, the size allocated on stack is decided + // by ArgFlags.getByValSize(), not by the size of LocVT. + assert ((ArgFlags.getByValSize() > 8) && + "ByValSize must be bigger than 8 bytes"); + ofst = State.AllocateStack(ArgFlags.getByValSize(), 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i32) { + ofst = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + if (LocVT == MVT::i64) { + ofst = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); + return false; + } + llvm_unreachable(0); + + return true; +} + + +static bool +CC_Hexagon (unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (ArgFlags.isByVal()) { + // Passed on stack. + assert ((ArgFlags.getByValSize() > 8) && + "ByValSize must be bigger than 8 bytes"); + unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } + + if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (LocVT == MVT::i32) { + if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64) { + if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + return true; // CC didn't match. +} + + +static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + static const unsigned RegList[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + static const unsigned RegList1[] = { + Hexagon::D1, Hexagon::D2 + }; + static const unsigned RegList2[] = { + Hexagon::R1, Hexagon::R3 + }; + if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + + unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + + if (LocVT == MVT::i1 || + LocVT == MVT::i8 || + LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } + + if (LocVT == MVT::i32) { + if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + if (LocVT == MVT::i64) { + if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) + return false; + } + + return true; // CC didn't match. +} + +static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + + if (LocVT == MVT::i32) { + if (unsigned Reg = State.AllocateReg(Hexagon::R0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + if (LocVT == MVT::i64) { + if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + + unsigned Offset = State.AllocateStack(8, 8); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; +} + +SDValue +HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) +const { + return SDValue(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. Sometimes what we are copying is the end of a +/// larger object, the part that does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(), MachinePointerInfo()); +} + + +// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is +// passed by value, the function prototype is modified to return void and +// the value is stored in memory pointed by a pointer passed by caller. +SDValue +HexagonTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + + // CCValAssign - represent the assignment of the return value to locations. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze return values of ISD::RET + CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); + + SDValue StackPtr = DAG.getRegister(TM.getRegisterInfo()->getStackRegister(), + MVT::i32); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + SDValue Ret = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain); +} + + + + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. Returns a SDNode with the same number of values as the +/// ISD::CALL. +SDValue +HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, + SDValue Callee) const { + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + Chain = DAG.getCopyFromReg(Chain, dl, + RVLocs[i].getLocReg(), + RVLocs[i].getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +/// LowerCall - Functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +SDValue +HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // Check for varargs. + NumNamedVarArgParams = -1; + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee)) + { + const Function* CalleeFn = NULL; + Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32); + if ((CalleeFn = dyn_cast<Function>(GA->getGlobal()))) + { + // If a function has zero args and is a vararg function, that's + // disallowed so it must be an undeclared function. Do not assume + // varargs if the callee is undefined. + if (CalleeFn->isVarArg() && + CalleeFn->getFunctionType()->getNumParams() != 0) { + NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams(); + } + } + } + + if (NumNamedVarArgParams > 0) + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg); + else + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); + + + if(isTailCall) { + bool StructAttrFlag = + DAG.getMachineFunction().getFunction()->hasStructRetAttr(); + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + isVarArg, IsStructRet, + StructAttrFlag, + Outs, OutVals, Ins, DAG); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){ + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + isTailCall = false; + break; + } + } + if (isTailCall) { + DEBUG(dbgs () << "Eligible for Tail Call\n"); + } else { + DEBUG(dbgs () << + "Argument must be passed on stack. Not eligible for Tail Call\n"); + } + } + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + + SDValue StackPtr = + DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(), + getPointerTy()); + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + // Loc info must be one of Full, SExt, ZExt, or AExt. + assert(0 && "Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.isMemLoc()) { + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); + + if (Flags.isByVal()) { + // The argument is a struct passed by value. According to LLVM, "Arg" + // is is pointer. + MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain, + Flags, DAG, dl)); + } else { + // The argument is not passed by value. "Arg" is a buildin type. It is + // not a pointer. + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(),false, false, + 0)); + } + continue; + } + + // Arguments that can be passed on register must be kept at RegsToPass + // vector. + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } + } + + // Transform all store nodes into one single node because all store + // nodes are independent of each other. + if (!MemOpChains.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], + MemOpChains.size()); + } + + if (!isTailCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, + getPointerTy(), true)); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + if (!isTailCall) { + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + } + + // For tail calls lower the arguments to the 'real' stack slot. + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + // + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDValue(); + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (flag_aligned_memcpy) { + const char *MemcpyName = + "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes"; + Callee = + DAG.getTargetExternalSymbol(MemcpyName, getPointerTy()); + flag_aligned_memcpy = false; + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); + } else if (ExternalSymbolSDNode *S = + dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + } + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + + if (InFlag.getNode()) { + Ops.push_back(InFlag); + } + + if (isTailCall) + return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + + Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, + InVals, OutVals, Callee); +} + +static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD) + return false; + + if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + isInc = (Ptr->getOpcode() == ISD::ADD); + Base = Ptr->getOperand(0); + Offset = Ptr->getOperand(1); + // Ensure that Offset is a constant. + return (isa<ConstantSDNode>(Offset)); + } + + return false; +} + +// TODO: Put this function along with the other isS* functions in +// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the +// functions defined in HexagonImmediates.td. +static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) { + ConstantSDNode *N = cast<ConstantSDNode>(S); + + // immS4 predicate - True if the immediate fits in a 4-bit sign extended. + // field. + int64_t v = (int64_t)N->getSExtValue(); + int64_t m = 0; + if (ShiftAmount > 0) { + m = v % ShiftAmount; + v = v >> ShiftAmount; + } + return (v <= 7) && (v >= -8) && (m == 0); +} + +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const +{ + EVT VT; + SDValue Ptr; + bool isSEXTLoad = false; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + VT = LD->getMemoryVT(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + VT = ST->getMemoryVT(); + if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) { + return false; + } + } else { + return false; + } + + bool isInc; + bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + // ShiftAmount = number of left-shifted bits in the Hexagon instruction. + int ShiftAmount = VT.getSizeInBits() / 16; + if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) { + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; + } + + return false; +} + +SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, + SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + MachineFunction &MF = DAG.getMachineFunction(); + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + switch (Node->getOpcode()) { + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + if (FuncInfo->hasClobberLR()) + break; + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_Mem: { + for (; NumVals; --NumVals, ++i) {} + break; + } + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + unsigned Reg = + cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + + // Check it to be lr + if (Reg == TM.getRegisterInfo()->getRARegister()) { + FuncInfo->setHasClobberLR(true); + break; + } + } + break; + } + } + } + } + } // Node->getOpcode + return Op; +} + + +// +// Taken from the XCore backend. +// +SDValue HexagonTargetLowering:: +LowerBR_JT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); + unsigned JTI = JT->getIndex(); + MachineFunction &MF = DAG.getMachineFunction(); + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + + // Mark all jump table targets as address taken. + const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables(); + const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs; + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + MBB->setHasAddressTaken(); + // This line is needed to set the hasAddressTaken flag on the BasicBlock + // object. + BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock())); + } + + SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl, + getPointerTy(), TargetJT); + SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index, + DAG.getConstant(2, MVT::i32)); + SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase, + ShiftIndex); + SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress, + MachinePointerInfo(), false, false, false, + 0); + return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget); +} + + +SDValue +HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + unsigned SPReg = getStackPointerRegisterToSaveRestore(); + + // Get a reference to the stack pointer. + SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32); + + // Subtract the dynamic size from the actual stack size to + // obtain the new stack size. + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); + + // + // For Hexagon, the outgoing memory arguments area should be on top of the + // alloca area on the stack i.e., the outgoing memory arguments should be + // at a lower address than the alloca area. Move the alloca area down the + // stack by adding back the space reserved for outgoing arguments to SP + // here. + // + // We do not know what the size of the outgoing args is at this point. + // So, we add a pseudo instruction ADJDYNALLOC that will adjust the + // stack pointer. We patch this instruction with the correct, known + // offset in emitPrologue(). + // + // Use a placeholder immediate (zero) for now. This will be patched up + // by emitPrologue(). + SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl, + MVT::i32, + Sub, + DAG.getConstant(0, MVT::i32)); + + // The Sub result contains the new stack start address, so it + // must be placed in the stack pointer register. + SDValue CopyChain = DAG.getCopyToReg(Chain, dl, + TM.getRegisterInfo()->getStackRegister(), + Sub); + + SDValue Ops[2] = { ArgAdjust, CopyChain }; + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue +HexagonTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const + SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) +const { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + HexagonMachineFunctionInfo *FuncInfo = + MF.getInfo<HexagonMachineFunctionInfo>(); + + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon); + + // For LLVM, in the case when returning a struct by value (>8byte), + // the first argument is a pointer that points to the location on caller's + // stack where the return value will be stored. For Hexagon, the location on + // caller's stack is passed only when the struct size is smaller than (and + // equal to) 8 bytes. If not, no address will be passed into callee and + // callee return the result direclty through R0/R1. + + SmallVector<SDValue, 4> MemOps; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + unsigned ObjSize; + unsigned StackLocation; + int FI; + + if ( (VA.isRegLoc() && !Flags.isByVal()) + || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) { + // Arguments passed in registers + // 1. int, long long, ptr args that get allocated in register. + // 2. Large struct that gets an register to put its address in. + EVT RegVT = VA.getLocVT(); + if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) { + unsigned VReg = + RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else if (RegVT == MVT::i64) { + unsigned VReg = + RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + } else { + assert (0); + } + } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) { + assert (0 && "ByValSize must be bigger than 8 bytes"); + } else { + // Sanity check. + assert(VA.isMemLoc()); + + if (Flags.isByVal()) { + // If it's a byval parameter, then we need to compute the + // "real" size, not the size of the pointer. + ObjSize = Flags.getByValSize(); + } else { + ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3; + } + + StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset(); + // Create the frame index object for this incoming parameter... + FI = MFI->CreateFixedObject(ObjSize, StackLocation, true); + + // Create the SelectionDAG nodes cordl, responding to a load + // from this parameter. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + + if (Flags.isByVal()) { + // If it's a pass-by-value aggregate, then do not dereference the stack + // location. Instead, we should generate a reference to the stack + // location. + InVals.push_back(FIN); + } else { + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + MachinePointerInfo(), false, false, + false, 0)); + } + } + } + + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], + MemOps.size()); + + if (isVarArg) { + // This will point to the next argument passed via stack. + int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize, + HEXAGON_LRFP_SIZE + + CCInfo.getNextStackOffset(), + true); + FuncInfo->setVarArgsFrameIndex(FrameIndex); + } + + return Chain; +} + +SDValue +HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + // VASTART stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + MachineFunction &MF = DAG.getMachineFunction(); + HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>(); + SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr, + Op.getOperand(1), MachinePointerInfo(SV), false, + false, 0); +} + +SDValue +HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDNode* OpNode = Op.getNode(); + + SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1, + Op.getOperand(2), Op.getOperand(3), + Op.getOperand(4)); + return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0), + Cond, Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue +HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(4, MVT::i32); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + MachinePointerInfo(), false, false, false, 0); + } + + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); +} + +SDValue +HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { + const HexagonRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + TRI->getFrameRegister(), VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), + false, false, false, 0); + return FrameAddr; +} + + +SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op, + SelectionDAG& DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + +SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG& DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0)); +} + + +SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, + SelectionDAG &DAG) const { + SDValue Result; + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); + DebugLoc dl = Op.getDebugLoc(); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); + + HexagonTargetObjectFile &TLOF = + (HexagonTargetObjectFile&)getObjFileLowering(); + if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { + return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result); + } + + return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); +} + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine + &targetmachine) + : TargetLowering(targetmachine, new HexagonTargetObjectFile()), + TM(targetmachine) { + + // Set up the register classes. + addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass); + addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass); + + addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass); + + computeRegisterProperties(); + + // Align loop entry + setPrefLoopAlignment(4); + + // Limits for inline expansion of memcpy/memmove + maxStoresPerMemcpy = 6; + maxStoresPerMemmove = 6; + + // + // Library calls for unsupported operations + // + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); + + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); + setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); + + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); + setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); + + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); + setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); + setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); + + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + + setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); + setOperationAction(ISD::SREM, MVT::i64, Expand); + + setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + + setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + + setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::UREM, MVT::i32, Expand); + + setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); + setOperationAction(ISD::FDIV, MVT::f32, Expand); + + setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + + setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setOperationAction(ISD::FADD, MVT::f64, Expand); + + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setOperationAction(ISD::FADD, MVT::f32, Expand); + + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setOperationAction(ISD::FADD, MVT::f32, Expand); + + setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); + + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); + + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + + setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + + setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + + setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + + setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); + setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); + + setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + + setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); + setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); + + setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); + setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); + + setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + + setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); + setOperationAction(ISD::MUL, MVT::f32, Expand); + + setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); + + setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + + + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setOperationAction(ISD::SUB, MVT::f64, Expand); + + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setOperationAction(ISD::SUB, MVT::f32, Expand); + + setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + + setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETO, MVT::f64, Expand); + + setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); + + setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETO, MVT::f32, Expand); + + setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + + setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal); + + setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal); + + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // Turn FP extload into load/fextend. + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // Hexagon has a i1 sign extending load. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Custom legalize GlobalAddress nodes into CONST32. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); + // Truncate action? + setOperationAction(ISD::TRUNCATE, MVT::i64, Expand); + + // Hexagon doesn't have sext_inreg, replace them with shl/sra. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + + // Hexagon has no REM or DIVREM operations. + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + // Expand fp<->uint. + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + + // Hexagon has no select or setcc: expand to SELECT_CC. + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + + // Lower SELECT_CC to SETCC and SELECT. + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + // This is a workaround documented in DAGCombiner.cpp:2892 We don't + // support SELECT_CC on every type. + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + if (EmitJumpTables) { + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + } else { + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + } + + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::ROTL , MVT::i32, Expand); + setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + + setOperationAction(ISD::EH_RETURN, MVT::Other, Expand); + + if (TM.getSubtargetImpl()->isSubtargetV2()) { + setExceptionPointerRegister(Hexagon::R20); + setExceptionSelectorRegister(Hexagon::R21); + } else { + setExceptionPointerRegister(Hexagon::R0); + setExceptionSelectorRegister(Hexagon::R1); + } + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + + + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::INLINEASM , MVT::Other, Custom); + + setMinFunctionAlignment(2); + + // Needed for DYNAMIC_STACKALLOC expansion. + unsigned StackRegister = TM.getRegisterInfo()->getStackRegister(); + setStackPointerRegisterToSaveRestore(StackRegister); +} + + +const char* +HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; + case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; + case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; + case HexagonISD::BRICC: return "HexagonISD::BRICC"; + case HexagonISD::BRFCC: return "HexagonISD::BRFCC"; + case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC"; + case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC"; + case HexagonISD::Hi: return "HexagonISD::Hi"; + case HexagonISD::Lo: return "HexagonISD::Lo"; + case HexagonISD::FTOI: return "HexagonISD::FTOI"; + case HexagonISD::ITOF: return "HexagonISD::ITOF"; + case HexagonISD::CALL: return "HexagonISD::CALL"; + case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; + case HexagonISD::BR_JT: return "HexagonISD::BR_JT"; + case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; + } +} + +bool +HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + EVT MTy1 = EVT::getEVT(Ty1); + EVT MTy2 = EVT::getEVT(Ty2); + if (!MTy1.isSimple() || !MTy2.isSimple()) { + return false; + } + return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32)); +} + +bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isSimple() || !VT2.isSimple()) { + return false; + } + return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32)); +} + +SDValue +HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: assert(0 && "Should not custom lower this!"); + // Frame & Return address. Currently unimplemented. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + case ISD::GlobalTLSAddress: + assert(0 && "TLS not implemented for Hexagon."); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); + case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); + case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::BR_JT: return LowerBR_JT(Op, DAG); + + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); + + } +} + + + +//===----------------------------------------------------------------------===// +// Hexagon Scheduler Hooks +//===----------------------------------------------------------------------===// +MachineBasicBlock * +HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) +const { + switch (MI->getOpcode()) { + case Hexagon::ADJDYNALLOC: { + MachineFunction *MF = BB->getParent(); + HexagonMachineFunctionInfo *FuncInfo = + MF->getInfo<HexagonMachineFunctionInfo>(); + FuncInfo->addAllocaAdjustInst(MI); + return BB; + } + default: + assert(false && "Unexpected instr type to insert"); + } // switch + return NULL; +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +std::pair<unsigned, const TargetRegisterClass*> +HexagonTargetLowering::getRegForInlineAsmConstraint(const + std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': // R0-R31 + switch (VT.getSimpleVT().SimpleTy) { + default: + assert(0 && "getRegForInlineAsmConstraint Unhandled data type"); + case MVT::i32: + case MVT::i16: + case MVT::i8: + return std::make_pair(0U, Hexagon::IntRegsRegisterClass); + case MVT::i64: + return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass); + } + default: + assert(0 && "Unknown asm register class"); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +/// isLegalAddressingMode - Return true if the addressing mode represented by +/// AM is legal for this target, for a load/store of the specified type. +bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // Allows a signed-extended 11-bit immediate field. + if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) { + return false; + } + + // No global is ever allowed as a base. + if (AM.BaseGV) { + return false; + } + + int Scale = AM.Scale; + if (Scale < 0) Scale = -Scale; + switch (Scale) { + case 0: // No scale reg, "r+i", "r", or just "i". + break; + default: // No scaled addressing mode. + return false; + } + return true; +} + +/// isLegalICmpImmediate - Return true if the specified immediate is legal +/// icmp immediate, that is the target has icmp instructions which can compare +/// a register against the immediate without having to materialize the +/// immediate into a register. +bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return Imm >= -512 && Imm <= 511; +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool HexagonTargetLowering::IsEligibleForTailCallOptimization( + SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // *************************************************************************** + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. + // *************************************************************************** + + // If this is a tail call via a function pointer, then don't do it! + if (!(dyn_cast<GlobalAddressSDNode>(Callee)) + && !(dyn_cast<ExternalSymbolSDNode>(Callee))) { + return false; + } + + // Do not optimize if the calling conventions do not match. + if (!CCMatch) + return false; + + // Do not tail call optimize vararg calls. + if (isVarArg) + return false; + + // Also avoid tail call optimization if either caller or callee uses struct + // return semantics. + if (isCalleeStructRet || isCallerStructRet) + return false; + + // In addition to the cases above, we also disable Tail Call Optimization if + // the calling convention code that at least one outgoing argument needs to + // go on the stack. We cannot check that here because at this point that + // information is not available. + return true; +} diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h new file mode 100644 index 0000000..b327615 --- /dev/null +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -0,0 +1,162 @@ +//==-- HexagonISelLowering.h - Hexagon DAG Lowering Interface ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Hexagon uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef Hexagon_ISELLOWERING_H +#define Hexagon_ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "Hexagon.h" + +namespace llvm { + namespace HexagonISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + CONST32, + CONST32_GP, // For marking data present in GP. + SETCC, + ADJDYNALLOC, + ARGEXTEND, + + CMPICC, // Compare two GPR operands, set icc. + CMPFCC, // Compare two FP operands, set fcc. + BRICC, // Branch to dest on icc condition + BRFCC, // Branch to dest on fcc condition + SELECT_ICC, // Select between two values using the current ICC flags. + SELECT_FCC, // Select between two values using the current FCC flags. + + Hi, Lo, // Hi/Lo operations, typically on a global address. + + FTOI, // FP to Int within a FP register. + ITOF, // Int to FP within a FP register. + + CALL, // A call instruction. + RET_FLAG, // Return with a flag operand. + BR_JT, // Jump table. + BARRIER, // Memory barrier. + WrapperJT, + TC_RETURN + }; + } + + class HexagonTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + + bool CanReturnSmallStruct(const Function* CalleeFn, + unsigned& RetSize) const; + + public: + HexagonTargetMachine &TM; + explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine); + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const + SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; + + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; + virtual bool isTruncateFree(EVT VT1, EVT VT2) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual const char *getTargetNodeName(unsigned Opcode) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SmallVectorImpl<SDValue> &OutVals, + SDValue Callee) const; + + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + virtual MachineBasicBlock + *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const; + + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + virtual EVT getSetCCResultType(EVT VT) const { + return MVT::i1; + } + + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + // Intrinsics + virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const; + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + /// The type may be VoidTy, in which case only return true if the addressing + /// mode is legal for a load/store of any legal type. + /// TODO: Handle pre/postinc as well. + virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + virtual bool isLegalICmpImmediate(int64_t Imm) const; + }; +} // end namespace llvm + +#endif // Hexagon_ISELLOWERING_H diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td new file mode 100644 index 0000000..1e3fcb8 --- /dev/null +++ b/lib/Target/Hexagon/HexagonImmediates.td @@ -0,0 +1,491 @@ +//=- HexagonImmediates.td - Hexagon immediate processing --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illnois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// From IA64's InstrInfo file +def s32Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s16Imm : Operand<i32> { + let PrintMethod = "printHexagonImmOperand"; +} + +def s12Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s11_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s10Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s8Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s9Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s8Imm64 : Operand<i64> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s6Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def s4_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u64Imm : Operand<i64> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u32Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u16_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u11_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u10Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u9Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u8Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u7Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_0Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_1Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u6_3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u5Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u4Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u3Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def u2Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def n8Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +def m6Imm : Operand<i32> { + // For now, we use a generic print function for all operands. + let PrintMethod = "printHexagonImmOperand"; +} + +// +// Immediate predicates +// +def s32ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<32>(v); +}]>; + +def s32_24ImmPred : PatLeaf<(i32 imm), [{ + // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign + // extended field that is a multiple of 0x1000000. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<32,24>(v); +}]>; + +def s32_16s8ImmPred : PatLeaf<(i32 imm), [{ + // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign + // extended field that is a multiple of 0x10000. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<24,16>(v); +}]>; + +def s16ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<16>(v); +}]>; + + +def s13ImmPred : PatLeaf<(i32 imm), [{ + // immS13 predicate - True if the immediate fits in a 13-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<13>(v); +}]>; + + +def s12ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<12>(v); +}]>; + +def s11_0ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<11>(v); +}]>; + + +def s11_1ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,1>(v); +}]>; + + +def s11_2ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,2>(v); +}]>; + + +def s11_3ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<11,3>(v); +}]>; + + +def s10ImmPred : PatLeaf<(i32 imm), [{ + // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<10>(v); +}]>; + + +def s9ImmPred : PatLeaf<(i32 imm), [{ + // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<9>(v); +}]>; + + +def s8ImmPred : PatLeaf<(i32 imm), [{ + // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + + +def s8Imm64Pred : PatLeaf<(i64 imm), [{ + // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<8>(v); +}]>; + + +def s6ImmPred : PatLeaf<(i32 imm), [{ + // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<6>(v); +}]>; + + +def s4_0ImmPred : PatLeaf<(i32 imm), [{ + // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<4>(v); +}]>; + + +def s4_1ImmPred : PatLeaf<(i32 imm), [{ + // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field of 2. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,1>(v); +}]>; + + +def s4_2ImmPred : PatLeaf<(i32 imm), [{ + // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field that is a multiple of 4. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,2>(v); +}]>; + + +def s4_3ImmPred : PatLeaf<(i32 imm), [{ + // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended + // field that is a multiple of 8. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<4,3>(v); +}]>; + + +def u64ImmPred : PatLeaf<(i64 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + // Adding "N ||" to supress gcc unused warning. + return (N || true); +}]>; + +def u32ImmPred : PatLeaf<(i32 imm), [{ + // immS16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<32>(v); +}]>; + +def u16ImmPred : PatLeaf<(i32 imm), [{ + // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<16>(v); +}]>; + +def u16_s8ImmPred : PatLeaf<(i32 imm), [{ + // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign + // extended s8 field. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<16,8>(v); +}]>; + +def u9ImmPred : PatLeaf<(i32 imm), [{ + // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<9>(v); +}]>; + + +def u8ImmPred : PatLeaf<(i32 imm), [{ + // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<8>(v); +}]>; + +def u7ImmPred : PatLeaf<(i32 imm), [{ + // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<7>(v); +}]>; + + +def u6ImmPred : PatLeaf<(i32 imm), [{ + // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_0ImmPred : PatLeaf<(i32 imm), [{ + // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field. Same as u6ImmPred. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<6>(v); +}]>; + +def u6_1ImmPred : PatLeaf<(i32 imm), [{ + // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field that is 1 bit alinged - multiple of 2. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,1>(v); +}]>; + +def u6_2ImmPred : PatLeaf<(i32 imm), [{ + // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field that is 2 bits alinged - multiple of 4. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,2>(v); +}]>; + +def u6_3ImmPred : PatLeaf<(i32 imm), [{ + // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned + // field that is 3 bits alinged - multiple of 8. + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedUInt<6,3>(v); +}]>; + +def u5ImmPred : PatLeaf<(i32 imm), [{ + // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<5>(v); +}]>; + + +def u3ImmPred : PatLeaf<(i32 imm), [{ + // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<3>(v); +}]>; + + +def u2ImmPred : PatLeaf<(i32 imm), [{ + // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<2>(v); +}]>; + + +def u1ImmPred : PatLeaf<(i1 imm), [{ + // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return isUInt<1>(v); +}]>; + +def m6ImmPred : PatLeaf<(i32 imm), [{ + // m6ImmPred predicate - True if the immediate is negative and fits in + // a 6-bit negative number. + int64_t v = (int64_t)N->getSExtValue(); + return isInt<6>(v); +}]>; + +//InN means negative integers in [-(2^N - 1), 0] +def n8ImmPred : PatLeaf<(i32 imm), [{ + // n8ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // field. + int64_t v = (int64_t)N->getSExtValue(); + return (-255 <= v && v <= 0); +}]>; diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td new file mode 100644 index 0000000..7e92776 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -0,0 +1,242 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr, + InstrItinClass itin> : Instruction { + field bits<32> Inst; + + let Namespace = "Hexagon"; + +/* Commented out for Hexagon + bits<2> op; + let Inst{31-30} = op; */ // Top two bits are the 'op' field + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + let Constraints = cstr; + let Itinerary = itin; +} + +//----------------------------------------------------------------------------// +// Intruction Classes Definitions + +//----------------------------------------------------------------------------// + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", LD> { + bits<5> rd; + bits<5> rs; + bits<13> imm13; +} + +// LD Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, LD> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<13> imm13; +} + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +class STInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", ST> { + bits<5> rd; + bits<5> rs; + bits<13> imm13; +} + +// ST Instruction Class in V2/V3 can take SLOT0 only. +// ST Instruction Class in V4 can take SLOT0 & SLOT1. +// Definition of the instruction class CHANGED from V2/V3 to V4. +class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, ST> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<13> imm13; +} + +// ALU32 Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", ALU32> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<16> imm16; + bits<16> imm16_2; +} + +// ALU64 Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. +class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", ALU64> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<16> imm16; + bits<16> imm16_2; +} + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", M> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// M Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. +class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, M> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst<dag outs, dag ins, string asmstr, list<dag> pattern> +//: InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, M)> { + : InstHexagon<outs, ins, asmstr, pattern, "", S> { +// : InstHexagon<outs, ins, asmstr, pattern, "", S> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// S Instruction Class in V2/V3. +// XTYPE Instruction Class in V4. +// Definition of the instruction class NOT CHANGED. +// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. +class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, S> { +// : InstHexagon<outs, ins, asmstr, pattern, cstr, S> { +// : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> { + bits<5> rd; + bits<5> rs; + bits<5> rt; +} + +// J Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JType<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", J> { + bits<16> imm16; +} + +// JR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class JRType<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", JR> { + bits<5> rs; + bits<5> pu; // Predicate register +} + +// CR Instruction Class in V2/V3/V4. +// Definition of the instruction class NOT CHANGED. +class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", CR> { + bits<5> rs; + bits<10> imm10; +} + + +class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO>; + + +//----------------------------------------------------------------------------// +// Intruction Classes Definitions - +//----------------------------------------------------------------------------// + + +// +// ALU32 patterns +//. +class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { +} + +class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { + let rt{0-4} = 0; +} + +class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { + let rt{0-4} = 0; +} + +class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU32Type<outs, ins, asmstr, pattern> { + let rt{0-4} = 0; +} + +// +// ALU64 patterns. +// +class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern> + : ALU64Type<outs, ins, asmstr, pattern> { +} + +// J Type Instructions. +class JInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : JType<outs, ins, asmstr, pattern> { +} + +// JR type Instructions. +class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern> + : JRType<outs, ins, asmstr, pattern> { +} + + +// Post increment ST Instruction. +class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr> + : STInstPost<outs, ins, asmstr, pattern, cstr> { + let rt{0-4} = 0; +} + +// Post increment LD Instruction. +class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr> + : LDInstPost<outs, ins, asmstr, pattern, cstr> { + let rt{0-4} = 0; +} + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormatsV4.td" + +//===----------------------------------------------------------------------===// +// V4 Instruction Format Definitions + +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td new file mode 100644 index 0000000..bd5e449 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -0,0 +1,46 @@ +//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instruction classes in TableGen format. +// +//===----------------------------------------------------------------------===// + +// +// NV type instructions. +// +class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4> { + bits<5> rd; + bits<5> rs; + bits<13> imm13; +} + +// Definition of Post increment new value store. +class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4> { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<13> imm13; +} + +// Post increment ST Instruction. +class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern, + string cstr> + : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> { + let rt{0-4} = 0; +} + +class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern> + : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4> { + bits<5> rd; + bits<5> rs; + bits<6> imm6; +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp new file mode 100644 index 0000000..69a50d7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -0,0 +1,1459 @@ +//=- HexagonInstrInfo.cpp - Hexagon Instruction Information -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "HexagonRegisterInfo.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "Hexagon.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#define GET_INSTRINFO_CTOR +#include "HexagonGenInstrInfo.inc" + +#include <iostream> + + +using namespace llvm; + +/// +/// Constants for Hexagon instructions. +/// +const int Hexagon_MEMW_OFFSET_MAX = 4095; +const int Hexagon_MEMW_OFFSET_MIN = 4096; +const int Hexagon_MEMD_OFFSET_MAX = 8191; +const int Hexagon_MEMD_OFFSET_MIN = 8192; +const int Hexagon_MEMH_OFFSET_MAX = 2047; +const int Hexagon_MEMH_OFFSET_MIN = 2048; +const int Hexagon_MEMB_OFFSET_MAX = 1023; +const int Hexagon_MEMB_OFFSET_MIN = 1024; +const int Hexagon_ADDI_OFFSET_MAX = 32767; +const int Hexagon_ADDI_OFFSET_MIN = 32768; +const int Hexagon_MEMD_AUTOINC_MAX = 56; +const int Hexagon_MEMD_AUTOINC_MIN = 64; +const int Hexagon_MEMW_AUTOINC_MAX = 28; +const int Hexagon_MEMW_AUTOINC_MIN = 32; +const int Hexagon_MEMH_AUTOINC_MAX = 14; +const int Hexagon_MEMH_AUTOINC_MIN = 16; +const int Hexagon_MEMB_AUTOINC_MAX = 7; +const int Hexagon_MEMB_AUTOINC_MIN = 8; + + + +HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + RI(ST, *this), Subtarget(ST) { +} + + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + + + switch (MI->getOpcode()) { + case Hexagon::LDriw: + case Hexagon::LDrid: + case Hexagon::LDrih: + case Hexagon::LDrib: + case Hexagon::LDriub: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + + default: + break; + } + + return 0; +} + + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + case Hexagon::STriw: + case Hexagon::STrid: + case Hexagon::STrih: + case Hexagon::STrib: + if (MI->getOperand(2).isFI() && + MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) { + FrameIndex = MI->getOperand(2).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + + default: + break; + } + + return 0; +} + + +unsigned +HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const{ + + int BOpc = Hexagon::JMP; + int BccOpc = Hexagon::JMP_Pred; + + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + int regPos = 0; + // Check if ReverseBranchCondition has asked to reverse this branch + // If we want to reverse the branch an odd number of times, we want + // JMP_PredNot. + if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { + BccOpc = Hexagon::JMP_PredNot; + regPos = 1; + } + + if (FBB == 0) { + if (Cond.empty()) { + // Due to a bug in TailMerging/CFG Optimization, we need to add a + // special case handling of a predicated jump followed by an + // unconditional jump. If not, Tail Merging and CFG Optimization go + // into an infinite loop. + MachineBasicBlock *NewTBB, *NewFBB; + SmallVector<MachineOperand, 4> Cond; + MachineInstr *Term = MBB.getFirstTerminator(); + if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, + false)) { + MachineBasicBlock *NextBB = + llvm::next(MachineFunction::iterator(&MBB)); + if (NewTBB == NextBB) { + ReverseBranchCondition(Cond); + RemoveBranch(MBB); + return InsertBranch(MBB, TBB, 0, Cond, DL); + } + } + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); + } else { + BuildMI(&MBB, DL, + get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + } + return 1; + } + + BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); + + return 2; +} + + +bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + FBB = NULL; + + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + + // A basic block may looks like this: + // + // [ insn + // EH_LABEL + // insn + // insn + // insn + // EH_LABEL + // insn ] + // + // It has two succs but does not have a terminator + // Don't know how to handle it. + do { + --I; + if (I->isEHLabel()) + return true; + } while (I != MBB.begin()); + + I = MBB.end(); + --I; + + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (LastInst->getOpcode() == Hexagon::JMP) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (LastInst->getOpcode() == Hexagon::JMP_Pred) { + // Block ends with fall-through true condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + if (LastInst->getOpcode() == Hexagon::JMP_PredNot) { + // Block ends with fall-through false condbranch. + TBB = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it. + if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) || + (SecondLastInst->getOpcode() == Hexagon::JMP_Pred)) && + LastInst->getOpcode() == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with Hexagon::JMP_PredNot and Hexagon:JMP, handle it. + if ((SecondLastInst->getOpcode() == Hexagon::JMP_PredNot) && + LastInst->getOpcode() == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(0)); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two Hexagon:JMPs, handle it. The second one is not + // executed, so remove it. + if (SecondLastInst->getOpcode() == Hexagon::JMP && + LastInst->getOpcode() == Hexagon::JMP) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + + +unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + int BOpc = Hexagon::JMP; + int BccOpc = Hexagon::JMP_Pred; + int BccOpcNot = Hexagon::JMP_PredNot; + + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc && + I->getOpcode() != BccOpcNot) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + + +void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg); + return; + } + if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) { + // Map Pd = Ps to Pd = or(Ps, Ps). + BuildMI(MBB, I, DL, get(Hexagon::OR_pp), + DestReg).addReg(SrcReg).addReg(SrcReg); + return; + } + if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) { + // We can have an overlap between single and double reg: r1:0 = r0. + if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) { + // r1:0 = r0 + BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } else { + // r1:0 = r1 or no overlap. + BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg, + Hexagon::subreg_loreg))).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg, + Hexagon::subreg_hireg))).addImm(0); + } + return; + } + if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg); + return; + } + + assert (0 && "Unimplemented"); +} + + +void HexagonInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOStore, + MFI.getObjectSize(FI), + Align); + + if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STrid)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(Hexagon::STriw_pred)) + .addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + } else { + assert(0 && "Unimplemented"); + } +} + + +void HexagonInstrInfo::storeRegToAddr( + MachineFunction &MF, unsigned SrcReg, + bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const +{ + assert(0 && "Unimplemented"); + return; +} + + +void HexagonInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(I); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachineMemOperand *MMO = + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), + Align); + + if (RC == Hexagon::IntRegsRegisterClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == Hexagon::DoubleRegsRegisterClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else if (RC == Hexagon::PredRegsRegisterClass) { + BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } else { + assert(0 && "Can't store this register to stack slot"); + } +} + + +void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const { + assert(0 && "Unimplemented"); +} + + +MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FI) const { + // Hexagon_TODO: Implement. + return(0); +} + + +unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { + + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *TRC; + if (VT == MVT::i1) { + TRC = Hexagon::PredRegsRegisterClass; + } else if (VT == MVT::i32) { + TRC = Hexagon::IntRegsRegisterClass; + } else if (VT == MVT::i64) { + TRC = Hexagon::DoubleRegsRegisterClass; + } else { + assert(0 && "Cannot handle this register class"); + } + + unsigned NewReg = RegInfo.createVirtualRegister(TRC); + return NewReg; +} + + +bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const { + bool isPred = MI->getDesc().isPredicable(); + + if (!isPred) + return false; + + const int Opc = MI->getOpcode(); + + switch(Opc) { + case Hexagon::TFRI: + return isInt<12>(MI->getOperand(1).getImm()); + + case Hexagon::STrid: + case Hexagon::STrid_indexed: + return isShiftedUInt<6,3>(MI->getOperand(1).getImm()); + + case Hexagon::STriw: + case Hexagon::STriw_indexed: + case Hexagon::STriw_nv_V4: + return isShiftedUInt<6,2>(MI->getOperand(1).getImm()); + + case Hexagon::STrih: + case Hexagon::STrih_indexed: + case Hexagon::STrih_nv_V4: + return isShiftedUInt<6,1>(MI->getOperand(1).getImm()); + + case Hexagon::STrib: + case Hexagon::STrib_indexed: + case Hexagon::STrib_nv_V4: + return isUInt<6>(MI->getOperand(1).getImm()); + + case Hexagon::LDrid: + case Hexagon::LDrid_indexed: + return isShiftedUInt<6,3>(MI->getOperand(2).getImm()); + + case Hexagon::LDriw: + case Hexagon::LDriw_indexed: + return isShiftedUInt<6,2>(MI->getOperand(2).getImm()); + + case Hexagon::LDrih: + case Hexagon::LDriuh: + case Hexagon::LDrih_indexed: + case Hexagon::LDriuh_indexed: + return isShiftedUInt<6,1>(MI->getOperand(2).getImm()); + + case Hexagon::LDrib: + case Hexagon::LDriub: + case Hexagon::LDrib_indexed: + case Hexagon::LDriub_indexed: + return isUInt<6>(MI->getOperand(2).getImm()); + + case Hexagon::POST_LDrid: + return isShiftedInt<4,3>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDriw: + return isShiftedInt<4,2>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDrih: + case Hexagon::POST_LDriuh: + return isShiftedInt<4,1>(MI->getOperand(3).getImm()); + + case Hexagon::POST_LDrib: + case Hexagon::POST_LDriub: + return isInt<4>(MI->getOperand(3).getImm()); + + case Hexagon::STrib_imm_V4: + case Hexagon::STrih_imm_V4: + case Hexagon::STriw_imm_V4: + return (isUInt<6>(MI->getOperand(1).getImm()) && + isInt<6>(MI->getOperand(2).getImm())); + + case Hexagon::ADD_ri: + return isInt<8>(MI->getOperand(2).getImm()); + + case Hexagon::ASLH: + case Hexagon::ASRH: + case Hexagon::SXTB: + case Hexagon::SXTH: + case Hexagon::ZXTB: + case Hexagon::ZXTH: + return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + + case Hexagon::JMPR: + return false; + return true; + + default: + return true; + } + + return true; +} + + +int HexagonInstrInfo:: +getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const { + switch(Opc) { + case Hexagon::TFR: + return !invertPredicate ? Hexagon::TFR_cPt : + Hexagon::TFR_cNotPt; + case Hexagon::TFRI: + return !invertPredicate ? Hexagon::TFRI_cPt : + Hexagon::TFRI_cNotPt; + case Hexagon::JMP: + return !invertPredicate ? Hexagon::JMP_Pred : + Hexagon::JMP_PredNot; + case Hexagon::ADD_ri: + return !invertPredicate ? Hexagon::ADD_ri_cPt : + Hexagon::ADD_ri_cNotPt; + case Hexagon::ADD_rr: + return !invertPredicate ? Hexagon::ADD_rr_cPt : + Hexagon::ADD_rr_cNotPt; + case Hexagon::XOR_rr: + return !invertPredicate ? Hexagon::XOR_rr_cPt : + Hexagon::XOR_rr_cNotPt; + case Hexagon::AND_rr: + return !invertPredicate ? Hexagon::AND_rr_cPt : + Hexagon::AND_rr_cNotPt; + case Hexagon::OR_rr: + return !invertPredicate ? Hexagon::OR_rr_cPt : + Hexagon::OR_rr_cNotPt; + case Hexagon::SUB_rr: + return !invertPredicate ? Hexagon::SUB_rr_cPt : + Hexagon::SUB_rr_cNotPt; + case Hexagon::COMBINE_rr: + return !invertPredicate ? Hexagon::COMBINE_rr_cPt : + Hexagon::COMBINE_rr_cNotPt; + case Hexagon::ASLH: + return !invertPredicate ? Hexagon::ASLH_cPt_V4 : + Hexagon::ASLH_cNotPt_V4; + case Hexagon::ASRH: + return !invertPredicate ? Hexagon::ASRH_cPt_V4 : + Hexagon::ASRH_cNotPt_V4; + case Hexagon::SXTB: + return !invertPredicate ? Hexagon::SXTB_cPt_V4 : + Hexagon::SXTB_cNotPt_V4; + case Hexagon::SXTH: + return !invertPredicate ? Hexagon::SXTH_cPt_V4 : + Hexagon::SXTH_cNotPt_V4; + case Hexagon::ZXTB: + return !invertPredicate ? Hexagon::ZXTB_cPt_V4 : + Hexagon::ZXTB_cNotPt_V4; + case Hexagon::ZXTH: + return !invertPredicate ? Hexagon::ZXTH_cPt_V4 : + Hexagon::ZXTH_cNotPt_V4; + + case Hexagon::JMPR: + return !invertPredicate ? Hexagon::JMPR_cPt : + Hexagon::JMPR_cNotPt; + + // V4 indexed+scaled load. + case Hexagon::LDrid_indexed_V4: + return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 : + Hexagon::LDrid_indexed_cNotPt_V4; + case Hexagon::LDrid_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 : + Hexagon::LDrid_indexed_shl_cNotPt_V4; + case Hexagon::LDrib_indexed_V4: + return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 : + Hexagon::LDrib_indexed_cNotPt_V4; + case Hexagon::LDriub_indexed_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : + Hexagon::LDriub_indexed_cNotPt_V4; + case Hexagon::LDriub_ae_indexed_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 : + Hexagon::LDriub_indexed_cNotPt_V4; + case Hexagon::LDrib_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 : + Hexagon::LDrib_indexed_shl_cNotPt_V4; + case Hexagon::LDriub_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : + Hexagon::LDriub_indexed_shl_cNotPt_V4; + case Hexagon::LDriub_ae_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 : + Hexagon::LDriub_indexed_shl_cNotPt_V4; + case Hexagon::LDrih_indexed_V4: + return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 : + Hexagon::LDrih_indexed_cNotPt_V4; + case Hexagon::LDriuh_indexed_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : + Hexagon::LDriuh_indexed_cNotPt_V4; + case Hexagon::LDriuh_ae_indexed_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 : + Hexagon::LDriuh_indexed_cNotPt_V4; + case Hexagon::LDrih_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 : + Hexagon::LDrih_indexed_shl_cNotPt_V4; + case Hexagon::LDriuh_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : + Hexagon::LDriuh_indexed_shl_cNotPt_V4; + case Hexagon::LDriuh_ae_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 : + Hexagon::LDriuh_indexed_shl_cNotPt_V4; + case Hexagon::LDriw_indexed_V4: + return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 : + Hexagon::LDriw_indexed_cNotPt_V4; + case Hexagon::LDriw_indexed_shl_V4: + return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 : + Hexagon::LDriw_indexed_shl_cNotPt_V4; + // Byte. + case Hexagon::POST_STbri: + return !invertPredicate ? Hexagon::POST_STbri_cPt : + Hexagon::POST_STbri_cNotPt; + case Hexagon::STrib: + return !invertPredicate ? Hexagon::STrib_cPt : + Hexagon::STrib_cNotPt; + case Hexagon::STrib_indexed: + return !invertPredicate ? Hexagon::STrib_indexed_cPt : + Hexagon::STrib_indexed_cNotPt; + case Hexagon::STrib_imm_V4: + return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 : + Hexagon::STrib_imm_cNotPt_V4; + case Hexagon::STrib_indexed_shl_V4: + return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 : + Hexagon::STrib_indexed_shl_cNotPt_V4; + // Halfword. + case Hexagon::POST_SThri: + return !invertPredicate ? Hexagon::POST_SThri_cPt : + Hexagon::POST_SThri_cNotPt; + case Hexagon::STrih: + return !invertPredicate ? Hexagon::STrih_cPt : + Hexagon::STrih_cNotPt; + case Hexagon::STrih_indexed: + return !invertPredicate ? Hexagon::STrih_indexed_cPt : + Hexagon::STrih_indexed_cNotPt; + case Hexagon::STrih_imm_V4: + return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 : + Hexagon::STrih_imm_cNotPt_V4; + case Hexagon::STrih_indexed_shl_V4: + return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 : + Hexagon::STrih_indexed_shl_cNotPt_V4; + // Word. + case Hexagon::POST_STwri: + return !invertPredicate ? Hexagon::POST_STwri_cPt : + Hexagon::POST_STwri_cNotPt; + case Hexagon::STriw: + return !invertPredicate ? Hexagon::STriw_cPt : + Hexagon::STriw_cNotPt; + case Hexagon::STriw_indexed: + return !invertPredicate ? Hexagon::STriw_indexed_cPt : + Hexagon::STriw_indexed_cNotPt; + case Hexagon::STriw_indexed_shl_V4: + return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 : + Hexagon::STriw_indexed_shl_cNotPt_V4; + case Hexagon::STriw_imm_V4: + return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 : + Hexagon::STriw_imm_cNotPt_V4; + // Double word. + case Hexagon::POST_STdri: + return !invertPredicate ? Hexagon::POST_STdri_cPt : + Hexagon::POST_STdri_cNotPt; + case Hexagon::STrid: + return !invertPredicate ? Hexagon::STrid_cPt : + Hexagon::STrid_cNotPt; + case Hexagon::STrid_indexed: + return !invertPredicate ? Hexagon::STrid_indexed_cPt : + Hexagon::STrid_indexed_cNotPt; + case Hexagon::STrid_indexed_shl_V4: + return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 : + Hexagon::STrid_indexed_shl_cNotPt_V4; + // Load. + case Hexagon::LDrid: + return !invertPredicate ? Hexagon::LDrid_cPt : + Hexagon::LDrid_cNotPt; + case Hexagon::LDriw: + return !invertPredicate ? Hexagon::LDriw_cPt : + Hexagon::LDriw_cNotPt; + case Hexagon::LDrih: + return !invertPredicate ? Hexagon::LDrih_cPt : + Hexagon::LDrih_cNotPt; + case Hexagon::LDriuh: + return !invertPredicate ? Hexagon::LDriuh_cPt : + Hexagon::LDriuh_cNotPt; + case Hexagon::LDrib: + return !invertPredicate ? Hexagon::LDrib_cPt : + Hexagon::LDrib_cNotPt; + case Hexagon::LDriub: + return !invertPredicate ? Hexagon::LDriub_cPt : + Hexagon::LDriub_cNotPt; + case Hexagon::LDriubit: + return !invertPredicate ? Hexagon::LDriub_cPt : + Hexagon::LDriub_cNotPt; + // Load Indexed. + case Hexagon::LDrid_indexed: + return !invertPredicate ? Hexagon::LDrid_indexed_cPt : + Hexagon::LDrid_indexed_cNotPt; + case Hexagon::LDriw_indexed: + return !invertPredicate ? Hexagon::LDriw_indexed_cPt : + Hexagon::LDriw_indexed_cNotPt; + case Hexagon::LDrih_indexed: + return !invertPredicate ? Hexagon::LDrih_indexed_cPt : + Hexagon::LDrih_indexed_cNotPt; + case Hexagon::LDriuh_indexed: + return !invertPredicate ? Hexagon::LDriuh_indexed_cPt : + Hexagon::LDriuh_indexed_cNotPt; + case Hexagon::LDrib_indexed: + return !invertPredicate ? Hexagon::LDrib_indexed_cPt : + Hexagon::LDrib_indexed_cNotPt; + case Hexagon::LDriub_indexed: + return !invertPredicate ? Hexagon::LDriub_indexed_cPt : + Hexagon::LDriub_indexed_cNotPt; + // Post Increment Load. + case Hexagon::POST_LDrid: + return !invertPredicate ? Hexagon::POST_LDrid_cPt : + Hexagon::POST_LDrid_cNotPt; + case Hexagon::POST_LDriw: + return !invertPredicate ? Hexagon::POST_LDriw_cPt : + Hexagon::POST_LDriw_cNotPt; + case Hexagon::POST_LDrih: + return !invertPredicate ? Hexagon::POST_LDrih_cPt : + Hexagon::POST_LDrih_cNotPt; + case Hexagon::POST_LDriuh: + return !invertPredicate ? Hexagon::POST_LDriuh_cPt : + Hexagon::POST_LDriuh_cNotPt; + case Hexagon::POST_LDrib: + return !invertPredicate ? Hexagon::POST_LDrib_cPt : + Hexagon::POST_LDrib_cNotPt; + case Hexagon::POST_LDriub: + return !invertPredicate ? Hexagon::POST_LDriub_cPt : + Hexagon::POST_LDriub_cNotPt; + // DEALLOC_RETURN. + case Hexagon::DEALLOC_RET_V4: + return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 : + Hexagon::DEALLOC_RET_cNotPt_V4; + default: + assert(false && "Unexpected predicable instruction"); + } +} + + +bool HexagonInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Cond) const { + int Opc = MI->getOpcode(); + assert (isPredicable(MI) && "Expected predicable instruction"); + bool invertJump = (!Cond.empty() && Cond[0].isImm() && + (Cond[0].getImm() == 0)); + MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump))); + // + // This assumes that the predicate is always the first operand + // in the set of inputs. + // + MI->addOperand(MI->getOperand(MI->getNumOperands()-1)); + int oper; + for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) { + MachineOperand MO = MI->getOperand(oper); + if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) { + break; + } + + if (MO.isReg()) { + MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(), + MO.isImplicit(), MO.isKill(), + MO.isDead(), MO.isUndef(), + MO.isDebug()); + } else if (MO.isImm()) { + MI->getOperand(oper+1).ChangeToImmediate(MO.getImm()); + } else { + assert(false && "Unexpected operand type"); + } + } + + int regPos = invertJump ? 1 : 0; + MachineOperand PredMO = Cond[regPos]; + MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(), + PredMO.isImplicit(), PredMO.isKill(), + PredMO.isDead(), PredMO.isUndef(), + PredMO.isDebug()); + + return true; +} + + +bool +HexagonInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCyles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const { + return true; +} + + +bool +HexagonInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, + unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, + unsigned ExtraFCycles, + const BranchProbability &Probability) const { + return true; +} + + +bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::TFR_cPt: + case Hexagon::TFR_cNotPt: + case Hexagon::TFRI_cPt: + case Hexagon::TFRI_cNotPt: + case Hexagon::TFR_cdnPt: + case Hexagon::TFR_cdnNotPt: + case Hexagon::TFRI_cdnPt: + case Hexagon::TFRI_cdnNotPt: + return true; + + case Hexagon::JMP_Pred: + case Hexagon::JMP_PredNot: + case Hexagon::BRCOND: + case Hexagon::JMP_PredPt: + case Hexagon::JMP_PredNotPt: + case Hexagon::JMP_PredPnt: + case Hexagon::JMP_PredNotPnt: + return true; + + case Hexagon::LDrid_indexed_cPt_V4 : + case Hexagon::LDrid_indexed_cdnPt_V4 : + case Hexagon::LDrid_indexed_cNotPt_V4 : + case Hexagon::LDrid_indexed_cdnNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnPt_V4 : + case Hexagon::LDrid_indexed_shl_cNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_cPt_V4 : + case Hexagon::LDrib_indexed_cdnPt_V4 : + case Hexagon::LDrib_indexed_cNotPt_V4 : + case Hexagon::LDrib_indexed_cdnNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnPt_V4 : + case Hexagon::LDrib_indexed_shl_cNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_cPt_V4 : + case Hexagon::LDriub_indexed_cdnPt_V4 : + case Hexagon::LDriub_indexed_cNotPt_V4 : + case Hexagon::LDriub_indexed_cdnNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnPt_V4 : + case Hexagon::LDriub_indexed_shl_cNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_cPt_V4 : + case Hexagon::LDrih_indexed_cdnPt_V4 : + case Hexagon::LDrih_indexed_cNotPt_V4 : + case Hexagon::LDrih_indexed_cdnNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnPt_V4 : + case Hexagon::LDrih_indexed_shl_cNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_cPt_V4 : + case Hexagon::LDriuh_indexed_cdnPt_V4 : + case Hexagon::LDriuh_indexed_cNotPt_V4 : + case Hexagon::LDriuh_indexed_cdnNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : + case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_cPt_V4 : + case Hexagon::LDriw_indexed_cdnPt_V4 : + case Hexagon::LDriw_indexed_cNotPt_V4 : + case Hexagon::LDriw_indexed_cdnNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnPt_V4 : + case Hexagon::LDriw_indexed_shl_cNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : + return true; + + case Hexagon::LDrid_cPt : + case Hexagon::LDrid_cNotPt : + case Hexagon::LDrid_indexed_cPt : + case Hexagon::LDrid_indexed_cNotPt : + case Hexagon::POST_LDrid_cPt : + case Hexagon::POST_LDrid_cNotPt : + case Hexagon::LDriw_cPt : + case Hexagon::LDriw_cNotPt : + case Hexagon::LDriw_indexed_cPt : + case Hexagon::LDriw_indexed_cNotPt : + case Hexagon::POST_LDriw_cPt : + case Hexagon::POST_LDriw_cNotPt : + case Hexagon::LDrih_cPt : + case Hexagon::LDrih_cNotPt : + case Hexagon::LDrih_indexed_cPt : + case Hexagon::LDrih_indexed_cNotPt : + case Hexagon::POST_LDrih_cPt : + case Hexagon::POST_LDrih_cNotPt : + case Hexagon::LDrib_cPt : + case Hexagon::LDrib_cNotPt : + case Hexagon::LDrib_indexed_cPt : + case Hexagon::LDrib_indexed_cNotPt : + case Hexagon::POST_LDrib_cPt : + case Hexagon::POST_LDrib_cNotPt : + case Hexagon::LDriuh_cPt : + case Hexagon::LDriuh_cNotPt : + case Hexagon::LDriuh_indexed_cPt : + case Hexagon::LDriuh_indexed_cNotPt : + case Hexagon::POST_LDriuh_cPt : + case Hexagon::POST_LDriuh_cNotPt : + case Hexagon::LDriub_cPt : + case Hexagon::LDriub_cNotPt : + case Hexagon::LDriub_indexed_cPt : + case Hexagon::LDriub_indexed_cNotPt : + case Hexagon::POST_LDriub_cPt : + case Hexagon::POST_LDriub_cNotPt : + return true; + + case Hexagon::LDrid_cdnPt : + case Hexagon::LDrid_cdnNotPt : + case Hexagon::LDrid_indexed_cdnPt : + case Hexagon::LDrid_indexed_cdnNotPt : + case Hexagon::POST_LDrid_cdnPt_V4 : + case Hexagon::POST_LDrid_cdnNotPt_V4 : + case Hexagon::LDriw_cdnPt : + case Hexagon::LDriw_cdnNotPt : + case Hexagon::LDriw_indexed_cdnPt : + case Hexagon::LDriw_indexed_cdnNotPt : + case Hexagon::POST_LDriw_cdnPt_V4 : + case Hexagon::POST_LDriw_cdnNotPt_V4 : + case Hexagon::LDrih_cdnPt : + case Hexagon::LDrih_cdnNotPt : + case Hexagon::LDrih_indexed_cdnPt : + case Hexagon::LDrih_indexed_cdnNotPt : + case Hexagon::POST_LDrih_cdnPt_V4 : + case Hexagon::POST_LDrih_cdnNotPt_V4 : + case Hexagon::LDrib_cdnPt : + case Hexagon::LDrib_cdnNotPt : + case Hexagon::LDrib_indexed_cdnPt : + case Hexagon::LDrib_indexed_cdnNotPt : + case Hexagon::POST_LDrib_cdnPt_V4 : + case Hexagon::POST_LDrib_cdnNotPt_V4 : + case Hexagon::LDriuh_cdnPt : + case Hexagon::LDriuh_cdnNotPt : + case Hexagon::LDriuh_indexed_cdnPt : + case Hexagon::LDriuh_indexed_cdnNotPt : + case Hexagon::POST_LDriuh_cdnPt_V4 : + case Hexagon::POST_LDriuh_cdnNotPt_V4 : + case Hexagon::LDriub_cdnPt : + case Hexagon::LDriub_cdnNotPt : + case Hexagon::LDriub_indexed_cdnPt : + case Hexagon::LDriub_indexed_cdnNotPt : + case Hexagon::POST_LDriub_cdnPt_V4 : + case Hexagon::POST_LDriub_cdnNotPt_V4 : + return true; + + case Hexagon::ADD_ri_cPt: + case Hexagon::ADD_ri_cNotPt: + case Hexagon::ADD_ri_cdnPt: + case Hexagon::ADD_ri_cdnNotPt: + case Hexagon::ADD_rr_cPt: + case Hexagon::ADD_rr_cNotPt: + case Hexagon::ADD_rr_cdnPt: + case Hexagon::ADD_rr_cdnNotPt: + case Hexagon::XOR_rr_cPt: + case Hexagon::XOR_rr_cNotPt: + case Hexagon::XOR_rr_cdnPt: + case Hexagon::XOR_rr_cdnNotPt: + case Hexagon::AND_rr_cPt: + case Hexagon::AND_rr_cNotPt: + case Hexagon::AND_rr_cdnPt: + case Hexagon::AND_rr_cdnNotPt: + case Hexagon::OR_rr_cPt: + case Hexagon::OR_rr_cNotPt: + case Hexagon::OR_rr_cdnPt: + case Hexagon::OR_rr_cdnNotPt: + case Hexagon::SUB_rr_cPt: + case Hexagon::SUB_rr_cNotPt: + case Hexagon::SUB_rr_cdnPt: + case Hexagon::SUB_rr_cdnNotPt: + case Hexagon::COMBINE_rr_cPt: + case Hexagon::COMBINE_rr_cNotPt: + case Hexagon::COMBINE_rr_cdnPt: + case Hexagon::COMBINE_rr_cdnNotPt: + return true; + + case Hexagon::ASLH_cPt_V4: + case Hexagon::ASLH_cNotPt_V4: + case Hexagon::ASRH_cPt_V4: + case Hexagon::ASRH_cNotPt_V4: + case Hexagon::SXTB_cPt_V4: + case Hexagon::SXTB_cNotPt_V4: + case Hexagon::SXTH_cPt_V4: + case Hexagon::SXTH_cNotPt_V4: + case Hexagon::ZXTB_cPt_V4: + case Hexagon::ZXTB_cNotPt_V4: + case Hexagon::ZXTH_cPt_V4: + case Hexagon::ZXTH_cNotPt_V4: + return true; + + case Hexagon::ASLH_cdnPt_V4: + case Hexagon::ASLH_cdnNotPt_V4: + case Hexagon::ASRH_cdnPt_V4: + case Hexagon::ASRH_cdnNotPt_V4: + case Hexagon::SXTB_cdnPt_V4: + case Hexagon::SXTB_cdnNotPt_V4: + case Hexagon::SXTH_cdnPt_V4: + case Hexagon::SXTH_cdnNotPt_V4: + case Hexagon::ZXTB_cdnPt_V4: + case Hexagon::ZXTB_cdnNotPt_V4: + case Hexagon::ZXTH_cdnPt_V4: + case Hexagon::ZXTH_cdnNotPt_V4: + return true; + + default: + return false; + } +} + + +bool +HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) { + MachineOperand MO = MI->getOperand(oper); + if (MO.isReg() && MO.isDef()) { + const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg()); + if (RC == Hexagon::PredRegsRegisterClass) { + Pred.push_back(MO); + return true; + } + } + } + return false; +} + + +bool +HexagonInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + // TODO: Fix this + return false; +} + + +// +// We indicate that we want to reverse the branch by +// inserting a 0 at the beginning of the Cond vector. +// +bool HexagonInstrInfo:: +ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { + if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) { + Cond.erase(Cond.begin()); + } else { + Cond.insert(Cond.begin(), MachineOperand::CreateImm(0)); + } + return false; +} + + +bool HexagonInstrInfo:: +isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs, + const BranchProbability &Probability) const { + return (NumInstrs <= 4); +} + +bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case Hexagon::DEALLOC_RET_V4 : + case Hexagon::DEALLOC_RET_cPt_V4 : + case Hexagon::DEALLOC_RET_cNotPt_V4 : + case Hexagon::DEALLOC_RET_cdnPnt_V4 : + case Hexagon::DEALLOC_RET_cNotdnPnt_V4 : + case Hexagon::DEALLOC_RET_cdnPt_V4 : + case Hexagon::DEALLOC_RET_cNotdnPt_V4 : + return true; + } + return false; +} + + +bool HexagonInstrInfo:: +isValidOffset(const int Opcode, const int Offset) const { + // This function is to check whether the "Offset" is in the correct range of + // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is + // inserted to calculate the final address. Due to this reason, the function + // assumes that the "Offset" has correct alignment. + + switch(Opcode) { + + case Hexagon::LDriw: + case Hexagon::STriw: + case Hexagon::STriwt: + assert((Offset % 4 == 0) && "Offset has incorrect alignment"); + return (Offset >= Hexagon_MEMW_OFFSET_MIN) && + (Offset <= Hexagon_MEMW_OFFSET_MAX); + + case Hexagon::LDrid: + case Hexagon::STrid: + assert((Offset % 8 == 0) && "Offset has incorrect alignment"); + return (Offset >= Hexagon_MEMD_OFFSET_MIN) && + (Offset <= Hexagon_MEMD_OFFSET_MAX); + + case Hexagon::LDrih: + case Hexagon::LDriuh: + case Hexagon::STrih: + case Hexagon::LDrih_ae: + assert((Offset % 2 == 0) && "Offset has incorrect alignment"); + return (Offset >= Hexagon_MEMH_OFFSET_MIN) && + (Offset <= Hexagon_MEMH_OFFSET_MAX); + + case Hexagon::LDrib: + case Hexagon::STrib: + case Hexagon::LDriub: + case Hexagon::LDriubit: + case Hexagon::LDrib_ae: + case Hexagon::LDriub_ae: + return (Offset >= Hexagon_MEMB_OFFSET_MIN) && + (Offset <= Hexagon_MEMB_OFFSET_MAX); + + case Hexagon::ADD_ri: + case Hexagon::TFR_FI: + return (Offset >= Hexagon_ADDI_OFFSET_MIN) && + (Offset <= Hexagon_ADDI_OFFSET_MAX); + + case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDi_indexed_MEM_V4 : + case Hexagon::MEMw_SUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDr_indexed_MEM_V4 : + case Hexagon::MEMw_SUBr_indexed_MEM_V4 : + case Hexagon::MEMw_ANDr_indexed_MEM_V4 : + case Hexagon::MEMw_ORr_indexed_MEM_V4 : + case Hexagon::MEMw_ADDSUBi_MEM_V4 : + case Hexagon::MEMw_ADDi_MEM_V4 : + case Hexagon::MEMw_SUBi_MEM_V4 : + case Hexagon::MEMw_ADDr_MEM_V4 : + case Hexagon::MEMw_SUBr_MEM_V4 : + case Hexagon::MEMw_ANDr_MEM_V4 : + case Hexagon::MEMw_ORr_MEM_V4 : + assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." ); + return (0 <= Offset && Offset <= 255); + + case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDi_indexed_MEM_V4 : + case Hexagon::MEMh_SUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDr_indexed_MEM_V4 : + case Hexagon::MEMh_SUBr_indexed_MEM_V4 : + case Hexagon::MEMh_ANDr_indexed_MEM_V4 : + case Hexagon::MEMh_ORr_indexed_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_MEM_V4 : + case Hexagon::MEMh_ADDi_MEM_V4 : + case Hexagon::MEMh_SUBi_MEM_V4 : + case Hexagon::MEMh_ADDr_MEM_V4 : + case Hexagon::MEMh_SUBr_MEM_V4 : + case Hexagon::MEMh_ANDr_MEM_V4 : + case Hexagon::MEMh_ORr_MEM_V4 : + assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." ); + return (0 <= Offset && Offset <= 127); + + case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDi_indexed_MEM_V4 : + case Hexagon::MEMb_SUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDr_indexed_MEM_V4 : + case Hexagon::MEMb_SUBr_indexed_MEM_V4 : + case Hexagon::MEMb_ANDr_indexed_MEM_V4 : + case Hexagon::MEMb_ORr_indexed_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_MEM_V4 : + case Hexagon::MEMb_ADDi_MEM_V4 : + case Hexagon::MEMb_SUBi_MEM_V4 : + case Hexagon::MEMb_ADDr_MEM_V4 : + case Hexagon::MEMb_SUBr_MEM_V4 : + case Hexagon::MEMb_ANDr_MEM_V4 : + case Hexagon::MEMb_ORr_MEM_V4 : + return (0 <= Offset && Offset <= 63); + + // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of + // any size. Later pass knows how to handle it. + case Hexagon::STriw_pred: + case Hexagon::LDriw_pred: + return true; + + // INLINEASM is very special. + case Hexagon::INLINEASM: + return true; + } + + assert(0 && "No offset range is defined for this opcode. Please define it in \ + the above switch statement!"); +} + + +// +// Check if the Offset is a valid auto-inc imm by Load/Store Type. +// +bool HexagonInstrInfo:: +isValidAutoIncImm(const EVT VT, const int Offset) const { + + if (VT == MVT::i64) { + return (Offset >= Hexagon_MEMD_AUTOINC_MIN && + Offset <= Hexagon_MEMD_AUTOINC_MAX && + (Offset & 0x7) == 0); + } + if (VT == MVT::i32) { + return (Offset >= Hexagon_MEMW_AUTOINC_MIN && + Offset <= Hexagon_MEMW_AUTOINC_MAX && + (Offset & 0x3) == 0); + } + if (VT == MVT::i16) { + return (Offset >= Hexagon_MEMH_AUTOINC_MIN && + Offset <= Hexagon_MEMH_AUTOINC_MAX && + (Offset & 0x1) == 0); + } + if (VT == MVT::i8) { + return (Offset >= Hexagon_MEMB_AUTOINC_MIN && + Offset <= Hexagon_MEMB_AUTOINC_MAX); + } + + assert(0 && "Not an auto-inc opc!"); + + return false; +} + + +bool HexagonInstrInfo:: +isMemOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) + { + case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDi_indexed_MEM_V4 : + case Hexagon::MEMw_SUBi_indexed_MEM_V4 : + case Hexagon::MEMw_ADDr_indexed_MEM_V4 : + case Hexagon::MEMw_SUBr_indexed_MEM_V4 : + case Hexagon::MEMw_ANDr_indexed_MEM_V4 : + case Hexagon::MEMw_ORr_indexed_MEM_V4 : + case Hexagon::MEMw_ADDSUBi_MEM_V4 : + case Hexagon::MEMw_ADDi_MEM_V4 : + case Hexagon::MEMw_SUBi_MEM_V4 : + case Hexagon::MEMw_ADDr_MEM_V4 : + case Hexagon::MEMw_SUBr_MEM_V4 : + case Hexagon::MEMw_ANDr_MEM_V4 : + case Hexagon::MEMw_ORr_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDi_indexed_MEM_V4 : + case Hexagon::MEMh_SUBi_indexed_MEM_V4 : + case Hexagon::MEMh_ADDr_indexed_MEM_V4 : + case Hexagon::MEMh_SUBr_indexed_MEM_V4 : + case Hexagon::MEMh_ANDr_indexed_MEM_V4 : + case Hexagon::MEMh_ORr_indexed_MEM_V4 : + case Hexagon::MEMh_ADDSUBi_MEM_V4 : + case Hexagon::MEMh_ADDi_MEM_V4 : + case Hexagon::MEMh_SUBi_MEM_V4 : + case Hexagon::MEMh_ADDr_MEM_V4 : + case Hexagon::MEMh_SUBr_MEM_V4 : + case Hexagon::MEMh_ANDr_MEM_V4 : + case Hexagon::MEMh_ORr_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDi_indexed_MEM_V4 : + case Hexagon::MEMb_SUBi_indexed_MEM_V4 : + case Hexagon::MEMb_ADDr_indexed_MEM_V4 : + case Hexagon::MEMb_SUBr_indexed_MEM_V4 : + case Hexagon::MEMb_ANDr_indexed_MEM_V4 : + case Hexagon::MEMb_ORr_indexed_MEM_V4 : + case Hexagon::MEMb_ADDSUBi_MEM_V4 : + case Hexagon::MEMb_ADDi_MEM_V4 : + case Hexagon::MEMb_SUBi_MEM_V4 : + case Hexagon::MEMb_ADDr_MEM_V4 : + case Hexagon::MEMb_SUBr_MEM_V4 : + case Hexagon::MEMb_ANDr_MEM_V4 : + case Hexagon::MEMb_ORr_MEM_V4 : + return true; + } + return false; +} + + +bool HexagonInstrInfo:: +isSpillPredRegOp(const MachineInstr *MI) const { + switch (MI->getOpcode()) + { + case Hexagon::STriw_pred : + case Hexagon::LDriw_pred : + return true; + } + return false; +} + + +bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + case Hexagon::ADD_ri_cPt: + case Hexagon::ADD_ri_cNotPt: + case Hexagon::ADD_rr_cPt: + case Hexagon::ADD_rr_cNotPt: + case Hexagon::XOR_rr_cPt: + case Hexagon::XOR_rr_cNotPt: + case Hexagon::AND_rr_cPt: + case Hexagon::AND_rr_cNotPt: + case Hexagon::OR_rr_cPt: + case Hexagon::OR_rr_cNotPt: + case Hexagon::SUB_rr_cPt: + case Hexagon::SUB_rr_cNotPt: + case Hexagon::COMBINE_rr_cPt: + case Hexagon::COMBINE_rr_cNotPt: + return true; + case Hexagon::ASLH_cPt_V4: + case Hexagon::ASLH_cNotPt_V4: + case Hexagon::ASRH_cPt_V4: + case Hexagon::ASRH_cNotPt_V4: + case Hexagon::SXTB_cPt_V4: + case Hexagon::SXTB_cNotPt_V4: + case Hexagon::SXTH_cPt_V4: + case Hexagon::SXTH_cNotPt_V4: + case Hexagon::ZXTB_cPt_V4: + case Hexagon::ZXTB_cNotPt_V4: + case Hexagon::ZXTH_cPt_V4: + case Hexagon::ZXTH_cNotPt_V4: + return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + + default: + return false; + } + return false; +} + + +bool HexagonInstrInfo:: +isConditionalLoad (const MachineInstr* MI) const { + const HexagonRegisterInfo& QRI = getRegisterInfo(); + switch (MI->getOpcode()) + { + case Hexagon::LDrid_cPt : + case Hexagon::LDrid_cNotPt : + case Hexagon::LDrid_indexed_cPt : + case Hexagon::LDrid_indexed_cNotPt : + case Hexagon::LDriw_cPt : + case Hexagon::LDriw_cNotPt : + case Hexagon::LDriw_indexed_cPt : + case Hexagon::LDriw_indexed_cNotPt : + case Hexagon::LDrih_cPt : + case Hexagon::LDrih_cNotPt : + case Hexagon::LDrih_indexed_cPt : + case Hexagon::LDrih_indexed_cNotPt : + case Hexagon::LDrib_cPt : + case Hexagon::LDrib_cNotPt : + case Hexagon::LDrib_indexed_cPt : + case Hexagon::LDrib_indexed_cNotPt : + case Hexagon::LDriuh_cPt : + case Hexagon::LDriuh_cNotPt : + case Hexagon::LDriuh_indexed_cPt : + case Hexagon::LDriuh_indexed_cNotPt : + case Hexagon::LDriub_cPt : + case Hexagon::LDriub_cNotPt : + case Hexagon::LDriub_indexed_cPt : + case Hexagon::LDriub_indexed_cNotPt : + return true; + case Hexagon::POST_LDrid_cPt : + case Hexagon::POST_LDrid_cNotPt : + case Hexagon::POST_LDriw_cPt : + case Hexagon::POST_LDriw_cNotPt : + case Hexagon::POST_LDrih_cPt : + case Hexagon::POST_LDrih_cNotPt : + case Hexagon::POST_LDrib_cPt : + case Hexagon::POST_LDrib_cNotPt : + case Hexagon::POST_LDriuh_cPt : + case Hexagon::POST_LDriuh_cNotPt : + case Hexagon::POST_LDriub_cPt : + case Hexagon::POST_LDriub_cNotPt : + return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + case Hexagon::LDrid_indexed_cPt_V4 : + case Hexagon::LDrid_indexed_cNotPt_V4 : + case Hexagon::LDrid_indexed_shl_cPt_V4 : + case Hexagon::LDrid_indexed_shl_cNotPt_V4 : + case Hexagon::LDrib_indexed_cPt_V4 : + case Hexagon::LDrib_indexed_cNotPt_V4 : + case Hexagon::LDrib_indexed_shl_cPt_V4 : + case Hexagon::LDrib_indexed_shl_cNotPt_V4 : + case Hexagon::LDriub_indexed_cPt_V4 : + case Hexagon::LDriub_indexed_cNotPt_V4 : + case Hexagon::LDriub_indexed_shl_cPt_V4 : + case Hexagon::LDriub_indexed_shl_cNotPt_V4 : + case Hexagon::LDrih_indexed_cPt_V4 : + case Hexagon::LDrih_indexed_cNotPt_V4 : + case Hexagon::LDrih_indexed_shl_cPt_V4 : + case Hexagon::LDrih_indexed_shl_cNotPt_V4 : + case Hexagon::LDriuh_indexed_cPt_V4 : + case Hexagon::LDriuh_indexed_cNotPt_V4 : + case Hexagon::LDriuh_indexed_shl_cPt_V4 : + case Hexagon::LDriuh_indexed_shl_cNotPt_V4 : + case Hexagon::LDriw_indexed_cPt_V4 : + case Hexagon::LDriw_indexed_cNotPt_V4 : + case Hexagon::LDriw_indexed_shl_cPt_V4 : + case Hexagon::LDriw_indexed_shl_cNotPt_V4 : + return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4; + default: + return false; + } + return false; +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h new file mode 100644 index 0000000..d549c46 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -0,0 +1,166 @@ +//=- HexagonInstrInfo.h - Hexagon Instruction Information ---------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonINSTRUCTIONINFO_H +#define HexagonINSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "HexagonRegisterInfo.h" + + +#define GET_INSTRINFO_HEADER +#include "HexagonGenInstrInfo.inc" + +namespace llvm { + +class HexagonInstrInfo : public HexagonGenInstrInfo { + const HexagonRegisterInfo RI; + const HexagonSubtarget& Subtarget; +public: + explicit HexagonInstrInfo(HexagonSubtarget &ST); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; } + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl<MachineOperand> &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl<MachineInstr*> &NewMIs) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl<unsigned> &Ops, + MachineInstr* LoadMI) const { + return 0; + } + + unsigned createVR(MachineFunction* MF, MVT VT) const; + + virtual bool isPredicable(MachineInstr *MI) const; + virtual bool + PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Cond) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + const BranchProbability &Probability) const; + + virtual bool isPredicated(const MachineInstr *MI) const; + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + virtual bool + SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool + ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + virtual bool + isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, + const BranchProbability &Probability) const; + + bool isValidOffset(const int Opcode, const int Offset) const; + bool isValidAutoIncImm(const EVT VT, const int Offset) const; + bool isMemOp(const MachineInstr *MI) const; + bool isSpillPredRegOp(const MachineInstr *MI) const; + bool isU6_3Immediate(const int value) const; + bool isU6_2Immediate(const int value) const; + bool isU6_1Immediate(const int value) const; + bool isU6_0Immediate(const int value) const; + bool isS4_3Immediate(const int value) const; + bool isS4_2Immediate(const int value) const; + bool isS4_1Immediate(const int value) const; + bool isS4_0Immediate(const int value) const; + bool isS12_Immediate(const int value) const; + bool isU6_Immediate(const int value) const; + bool isS8_Immediate(const int value) const; + bool isS6_Immediate(const int value) const; + + bool isConditionalALU32 (const MachineInstr* MI) const; + bool isConditionalLoad (const MachineInstr* MI) const; + bool isDeallocRet(const MachineInstr *MI) const; + +private: + int getMatchingCondBranchOpcode(int Opc, bool sense) const; + +}; + +} + +#endif diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td new file mode 100644 index 0000000..cc508b7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -0,0 +1,3014 @@ +//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "HexagonInstrFormats.td" +include "HexagonImmediates.td" + +//===----------------------------------------------------------------------===// +// Hexagon Instruction Predicate Definitions. +//===----------------------------------------------------------------------===// +def HasV2T : Predicate<"Subtarget.hasV2TOps()">; +def HasV2TOnly : Predicate<"Subtarget.hasV2TOpsOnly()">; +def NoV2T : Predicate<"!Subtarget.hasV2TOps()">; +def HasV3T : Predicate<"Subtarget.hasV3TOps()">; +def HasV3TOnly : Predicate<"Subtarget.hasV3TOpsOnly()">; +def NoV3T : Predicate<"!Subtarget.hasV3TOps()">; +def HasV4T : Predicate<"Subtarget.hasV4TOps()">; +def NoV4T : Predicate<"!Subtarget.hasV4TOps()">; +def UseMEMOP : Predicate<"Subtarget.useMemOps()">; + +// Addressing modes. +def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>; +def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>; +def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>; +def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>; +def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>; +def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>; +def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>; +def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; + +// Address operands. +def MEMrr : Operand<i32> { + let PrintMethod = "printHexagonMEMrrOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} + +// Address operands +def MEMri : Operand<i32> { + let PrintMethod = "printHexagonMEMriOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} + +def MEMri_s11_2 : Operand<i32>, + ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { + let PrintMethod = "printHexagonMEMriOperand"; + let MIOperandInfo = (ops IntRegs, s11Imm); +} + +def FrameIndex : Operand<i32> { + let PrintMethod = "printHexagonFrameIndexOperand"; + let MIOperandInfo = (ops IntRegs, s11Imm); +} + +let PrintMethod = "printGlobalOperand" in + def globaladdress : Operand<i32>; + +let PrintMethod = "printJumpTable" in + def jumptablebase : Operand<i32>; + +def brtarget : Operand<OtherVT>; +def calltarget : Operand<i32>; + +def bblabel : Operand<i32>; +def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">; + +def symbolHi32 : Operand<i32> { + let PrintMethod = "printSymbolHi"; +} +def symbolLo32 : Operand<i32> { + let PrintMethod = "printSymbolLo"; +} + +// Multi-class for logical operators. +multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> { + def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")), + [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>; +} + +// Multi-class for compare ops. +let isCompare = 1 in { +multiclass CMP64_rr<string OpcStr, PatFrag OpNode> { + def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>; +} +multiclass CMP32_rr<string OpcStr, PatFrag OpNode> { + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; +} + +multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> { + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>; +} + +multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> { + def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>; +} + +multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> { + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>; +} + +multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> { + def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c), + !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")), + [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>; +} +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html +// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU32/ALU + +//===----------------------------------------------------------------------===// +// Add. +let isPredicable = 1 in +def ADD_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = add($src1, $src2)", + [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>; + +let isPredicable = 1 in +def ADD_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s16Imm:$src2), + "$dst = add($src1, #$src2)", + [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>; + +// Logical operations. +let isPredicable = 1 in +def XOR_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = xor($src1, $src2)", + [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>; + +let isPredicable = 1 in +def AND_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = and($src1, $src2)", + [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>; + +def OR_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s8Imm:$src2), + "$dst = or($src1, #$src2)", + [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>; + +def NOT_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1), + "$dst = not($src1)", + [(set IntRegs:$dst, (not IntRegs:$src1))]>; + +def AND_ri : ALU32_ri<(outs IntRegs:$dst), + (ins IntRegs:$src1, s10Imm:$src2), + "$dst = and($src1, #$src2)", + [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>; + +let isPredicable = 1 in +def OR_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = or($src1, $src2)", + [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>; + +// Negate. +def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = neg($src1)", + [(set IntRegs:$dst, (ineg IntRegs:$src1))]>; +// Nop. +let neverHasSideEffects = 1 in +def NOP : ALU32_rr<(outs), (ins), + "nop", + []>; + +// Subtract. +let isPredicable = 1 in +def SUB_rr : ALU32_rr<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = sub($src1, $src2)", + [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>; + +// Transfer immediate. +let isReMaterializable = 1, isPredicable = 1 in +def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1), + "$dst = #$src1", + [(set IntRegs:$dst, s16ImmPred:$src1)]>; + +// Transfer register. +let neverHasSideEffects = 1, isPredicable = 1 in +def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + []>; + +// Transfer control register. +let neverHasSideEffects = 1 in +def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + []>; +//===----------------------------------------------------------------------===// +// ALU32/ALU - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PERM + +//===----------------------------------------------------------------------===// + +// Combine. +let isPredicable = 1, neverHasSideEffects = 1 in +def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = combine($src1, $src2)", + []>; + +// Mux. +def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, + DoubleRegs:$src2, + DoubleRegs:$src3), + "$dst = vmux($src1, $src2, $src3)", + []>; + +def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst = mux($src1, $src2, $src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2, + IntRegs:$src3), + "$dst = mux($src1, #$src2, $src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, + s8ImmPred:$src2, IntRegs:$src3))]>; + +def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, + s8Imm:$src3), + "$dst = mux($src1, $src2, #$src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, + s8ImmPred:$src3))]>; + +def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2, + s8Imm:$src3), + "$dst = mux($src1, #$src2, #$src3)", + [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2, + s8ImmPred:$src3))]>; + +// Shift halfword. +let isPredicable = 1 in +def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = aslh($src1)", + [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>; + +let isPredicable = 1 in +def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = asrh($src1)", + [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>; + +// Sign extend. +let isPredicable = 1 in +def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = sxtb($src1)", + [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>; + +let isPredicable = 1 in +def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = sxth($src1)", + [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>; + +// Zero extend. +let isPredicable = 1, neverHasSideEffects = 1 in +def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = zxtb($src1)", + []>; + +let isPredicable = 1, neverHasSideEffects = 1 in +def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = zxth($src1)", + []>; +//===----------------------------------------------------------------------===// +// ALU32/PERM - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU32/PRED + +//===----------------------------------------------------------------------===// + +// Conditional add. +let neverHasSideEffects = 1 in +def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if ($src1) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if (!$src1) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if ($src1.new) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3), + "if (!$src1.new) $dst = add($src2, #$src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = add($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = add($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = add($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = add($src2, $src3)", + []>; + + +// Conditional combine. + +let neverHasSideEffects = 1 in +def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = combine($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = combine($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = combine($src2, $src3)", + []>; + +let neverHasSideEffects = 1 in +def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = combine($src2, $src3)", + []>; + +// Conditional logical operations. + +def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = xor($src2, $src3)", + []>; + +def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = xor($src2, $src3)", + []>; + +def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = xor($src2, $src3)", + []>; + +def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = xor($src2, $src3)", + []>; + +def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = and($src2, $src3)", + []>; + +def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = and($src2, $src3)", + []>; + +def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = and($src2, $src3)", + []>; + +def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = and($src2, $src3)", + []>; + +def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = or($src2, $src3)", + []>; + +def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = or($src2, $src3)", + []>; + +def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = or($src2, $src3)", + []>; + +def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = or($src2, $src3)", + []>; + + +// Conditional subtract. + +def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst = sub($src2, $src3)", + []>; + +def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst = sub($src2, $src3)", + []>; + +def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst = sub($src2, $src3)", + []>; + +def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst = sub($src2, $src3)", + []>; + + +// Conditional transfer. + +let neverHasSideEffects = 1 in +def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2), + "if (!$src1) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2), + "if ($src1) $dst = #$src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, + s12Imm:$src2), + "if (!$src1) $dst = #$src2", + []>; + +let neverHasSideEffects = 1 in +def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2), + "if ($src1.new) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2), + "if (!$src1.new) $dst = $src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, + s12Imm:$src2), + "if ($src1.new) $dst = #$src2", + []>; + +let neverHasSideEffects = 1 in +def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, + s12Imm:$src2), + "if (!$src1.new) $dst = #$src2", + []>; + +// Compare. +defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>; +defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>; +defm CMPLT : CMP32_rr<"cmp.lt", setlt>; +defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>; +defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>; +defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>; +//===----------------------------------------------------------------------===// +// ALU32/PRED - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU32/VH + +//===----------------------------------------------------------------------===// +// Vector add halfwords + +// Vector averagehalfwords + +// Vector subtract halfwords +//===----------------------------------------------------------------------===// +// ALU32/VH - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// +// Add. +def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = add($src1, $src2)", + [(set DoubleRegs:$dst, (add DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +// Add halfword. + +// Compare. +defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>; +defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>; +defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>; + +// Logical operations. +def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = and($src1, $src2)", + [(set DoubleRegs:$dst, (and DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = or($src1, $src2)", + [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>; + +def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = xor($src1, $src2)", + [(set DoubleRegs:$dst, (xor DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +// Maximum. +def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = max($src2, $src1)", + [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, IntRegs:$src2))]>; + +// Minimum. +def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = min($src2, $src1)", + [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2, + IntRegs:$src1)), + IntRegs:$src1, IntRegs:$src2))]>; + +// Subtract. +def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = sub($src1, $src2)", + [(set DoubleRegs:$dst, (sub DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +// Subtract halfword. + +// Transfer register. +let neverHasSideEffects = 1 in +def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + "$dst = $src1", + []>; +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/BIT + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/BIT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/PERM + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/VB + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/VB - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/VH + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/VW + +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// ALU64/VW - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CR + +//===----------------------------------------------------------------------===// +// Logical reductions on predicates. + +// Looping instructions. + +// Pipelined looping instructions. + +// Logical operations on predicates. +def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = and($src1, $src2)", + [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>; + +let neverHasSideEffects = 1 in +def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, + PredRegs:$src2), + "$dst = and($src1, !$src2)", + []>; + +def NOT_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = not($src1)", + [(set PredRegs:$dst, (not PredRegs:$src1))]>; + +def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = any8($src1)", + []>; + +def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = all8($src1)", + []>; + +def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1, + PredRegs:$src2), + "$dst = vitpack($src1, $src2)", + []>; + +def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + PredRegs:$src3), + "$dst = valignb($src1, $src2, $src3)", + []>; + +def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + PredRegs:$src3), + "$dst = vspliceb($src1, $src2, $src3)", + []>; + +def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1), + "$dst = mask($src1)", + []>; + +def NOT_Ps : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1), + "$dst = not($src1)", + [(set PredRegs:$dst, (not PredRegs:$src1))]>; + +def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = or($src1, $src2)", + [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>; + +def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2), + "$dst = xor($src1, $src2)", + [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>; + + +// User control register transfer. +//===----------------------------------------------------------------------===// +// CR - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// J + +//===----------------------------------------------------------------------===// +// Jump to address. +let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in { + def JMP : JInst< (outs), + (ins brtarget:$offset), + "jump $offset", + [(br bb:$offset)]>; +} + +// if (p0) jump +let isBranch = 1, isTerminator=1, Defs = [PC] in { + def JMP_Pred : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if ($src) jump $offset", + [(brcond PredRegs:$src, bb:$offset)]>; +} + +// if (!p0) jump +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredNot : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if (!$src) jump $offset", + []>; +} + +let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC] in { + def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst), + "if ($pred) jump $dst", + []>; +} + +// Jump to address conditioned on new predicate. +// if (p0) jump:t +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredPt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if ($src.new) jump:t $offset", + []>; +} + +// if (!p0) jump:t +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredNotPt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if (!$src.new) jump:t $offset", + []>; +} + +// Not taken. +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredPnt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if ($src.new) jump:nt $offset", + []>; +} + +// Not taken. +let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in { + def JMP_PredNotPnt : JInst< (outs), + (ins PredRegs:$src, brtarget:$offset), + "if (!$src.new) jump:nt $offset", + []>; +} +//===----------------------------------------------------------------------===// +// J - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +// Jump to address from register. +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR: JRInst<(outs), (ins), + "jumpr r31", + [(retflag)]>; +} + +// Jump to address from register. +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1), + "if ($src1) jumpr r31", + []>; +} + +// Jump to address from register. +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1), + "if (!$src1) jumpr r31", + []>; +} + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// +/// +/// Make sure that in post increment load, the first operand is always the post +/// increment operand. +/// +// Load doubleword. +let isPredicable = 1 in +def LDrid : LDInst<(outs DoubleRegs:$dst), + (ins MEMri:$addr), + "$dst = memd($addr)", + [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>; + +let isPredicable = 1, AddedComplexity = 20 in +def LDrid_indexed : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, s11_3Imm:$offset), + "$dst=memd($src1+#$offset)", + [(set DoubleRegs:$dst, (load (add IntRegs:$src1, + s11_3ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_GP : LDInst<(outs DoubleRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memd(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDd_GP : LDInst<(outs DoubleRegs:$dst), + (ins globaladdress:$global), + "$dst=memd(#$global)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memd($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load doubleword conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memd($addr)", + []>; + + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memd($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if ($src1) $dst=memd($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if (!$src1) $dst=memd($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if ($src1) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if (!$src1) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memd($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memd($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if ($src1.new) $dst=memd($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3), + "if (!$src1.new) $dst=memd($src2+#$src3)", + []>; + + +// Load byte. +let isPredicable = 1 in +def LDrib : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memb($addr)", + [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>; + +def LDrib_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memb($addr)", + [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>; + +// Indexed load byte. +let isPredicable = 1, AddedComplexity = 20 in +def LDrib_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memb($src1+#$offset)", + [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + + +// Indexed load byte any-extend. +let AddedComplexity = 20 in +def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memb($src1+#$offset)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memb(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDb_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memb(#$global)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDub_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memub(#$global)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memb($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load byte conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1) $dst = memb($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1) $dst = memb($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memb($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1.new) $dst = memb($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1.new) $dst = memb($src2+#$src3)", + []>; + + +// Load halfword. +let isPredicable = 1 in +def LDrih : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memh($addr)", + [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>; + +let isPredicable = 1, AddedComplexity = 20 in +def LDrih_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memh($src1+#$offset)", + [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))] >; + +def LDrih_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memh($addr)", + [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>; + +let AddedComplexity = 20 in +def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memh($src1+#$offset)", + [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))] >; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memh(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDh_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memh(#$global)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDuh_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memuh(#$global)", + []>; + + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memh($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load halfword conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1) $dst = memh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1) $dst = memh($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1.new) $dst = memh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1.new) $dst = memh($src2+#$src3)", + []>; + +// Load unsigned byte. +let isPredicable = 1 in +def LDriub : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memub($addr)", + [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>; + +let isPredicable = 1 in +def LDriubit : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memub($addr)", + [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>; + +let isPredicable = 1, AddedComplexity = 20 in +def LDriub_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memub($src1+#$offset)", + [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +let AddedComplexity = 20 in +def LDriubit_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memub($src1+#$offset)", + [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +def LDriub_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memub($addr)", + [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>; + + +let AddedComplexity = 20 in +def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_0Imm:$offset), + "$dst=memub($src1+#$offset)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + s11_0ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memub(#$global+$offset)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memub($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load unsigned byte conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1) $dst = memub($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1) $dst = memub($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memub($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if ($src1.new) $dst = memub($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3), + "if (!$src1.new) $dst = memub($src2+#$src3)", + []>; + +// Load unsigned halfword. +let isPredicable = 1 in +def LDriuh : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memuh($addr)", + [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>; + +// Indexed load unsigned halfword. +let isPredicable = 1, AddedComplexity = 20 in +def LDriuh_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memuh($src1+#$offset)", + [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))]>; + +def LDriuh_ae : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), + "$dst = memuh($addr)", + [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>; + + +// Indexed load unsigned halfword any-extend. +let AddedComplexity = 20 in +def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_1Imm:$offset), + "$dst=memuh($src1+#$offset)", + [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, + s11_1ImmPred:$offset)))] >; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memuh(#$global+$offset)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memuh($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load unsigned halfword conditionally. +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1) $dst = memuh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1) $dst = memuh($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memuh($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if ($src1.new) $dst = memuh($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3), + "if (!$src1.new) $dst = memuh($src2+#$src3)", + []>; + + +// Load word. +let isPredicable = 1 in +def LDriw : LDInst<(outs IntRegs:$dst), + (ins MEMri:$addr), "$dst = memw($addr)", + [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>; + +// Load predicate. +let mayLoad = 1, Defs = [R10,R11] in +def LDriw_pred : LDInst<(outs PredRegs:$dst), + (ins MEMri:$addr), + "Error; should not emit", + []>; + +// Indexed load. +let isPredicable = 1, AddedComplexity = 20 in +def LDriw_indexed : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s11_2Imm:$offset), + "$dst=memw($src1+#$offset)", + [(set IntRegs:$dst, (load (add IntRegs:$src1, + s11_2ImmPred:$offset)))]>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global, u16Imm:$offset), + "$dst=memw(#$global+$offset)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDw_GP : LDInst<(outs IntRegs:$dst), + (ins globaladdress:$global), + "$dst=memw(#$global)", + []>; + +let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2), + (ins IntRegs:$src1, s4Imm:$offset), + "$dst = memw($src1++#$offset)", + [], + "$src1 = $dst2">; + +// Load word conditionally. + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if ($src1) $dst=memw($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if (!$src1) $dst=memw($src2+#$src3)", + []>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if ($src1) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if (!$src1) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if ($src1.new) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, MEMri:$addr), + "if (!$src1.new) $dst = memw($addr)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if ($src1.new) $dst=memw($src2+#$src3)", + []>; + +let mayLoad = 1, neverHasSideEffects = 1 in +def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3), + "if (!$src1.new) $dst=memw($src2+#$src3)", + []>; + +// Deallocate stack frame. +let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { + def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1), + "deallocframe", + []>; +} + +// Load and unpack bytes to halfwords. +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/ALU + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/COMPLEX - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH + +//===----------------------------------------------------------------------===// +// Multiply and use lower result. +// Rd=+mpyi(Rs,#u8) +def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), + "$dst =+ mpyi($src1, #$src2)", + [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>; + +// Rd=-mpyi(Rs,#u8) +def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2), + "$dst =- mpyi($src1, #$src2)", + [(set IntRegs:$dst, + (mul IntRegs:$src1, n8ImmPred:$src2))]>; + +// Rd=mpyi(Rs,#m9) +// s9 is NOT the same as m9 - but it works.. so far. +// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8) +// depending on the value of m9. See Arch Spec. +def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2), + "$dst = mpyi($src1, #$src2)", + [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>; + +// Rd=mpyi(Rs,Rt) +def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyi($src1, $src2)", + [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>; + +// Rx+=mpyi(Rs,#u8) +def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + "$dst += mpyi($src2, #$src3)", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))], + "$src1 = $dst">; + +// Rx+=mpyi(Rs,Rt) +def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpyi($src2, $src3)", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))], + "$src1 = $dst">; + +// Rx-=mpyi(Rs,#u8) +def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3), + "$dst -= mpyi($src2, #$src3)", + [(set IntRegs:$dst, + (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))], + "$src1 = $dst">; + +// Multiply and use upper result. +// Rd=mpy(Rs,Rt.H):<<1:rnd:sat +// Rd=mpy(Rs,Rt.L):<<1:rnd:sat +// Rd=mpy(Rs,Rt) +def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>; + +// Rd=mpy(Rs,Rt):rnd +// Rd=mpyu(Rs,Rt) +def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyu($src1, $src2)", + [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>; + +// Multiply and use full result. +// Rdd=mpyu(Rs,Rt) +def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpyu($src1, $src2)", + [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)), + (i64 (anyext IntRegs:$src2))))]>; + +// Rdd=mpy(Rs,Rt) +def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)), + (i64 (sext IntRegs:$src2))))]>; + + +// Multiply and accumulate, use full result. +// Rxx[+-]=mpy(Rs,Rt) +// Rxx+=mpy(Rs,Rt) +def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpy($src2, $src3)", + [(set DoubleRegs:$dst, + (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))), + DoubleRegs:$src1))], + "$src1 = $dst">; + +// Rxx-=mpy(Rs,Rt) +def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst -= mpy($src2, $src3)", + [(set DoubleRegs:$dst, + (sub DoubleRegs:$src1, + (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))], + "$src1 = $dst">; + +// Rxx[+-]=mpyu(Rs,Rt) +// Rxx+=mpyu(Rs,Rt) +def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst += mpyu($src2, $src3)", + [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)), + (i64 (anyext IntRegs:$src3))), + DoubleRegs:$src1))],"$src1 = $dst">; + +// Rxx-=mpyu(Rs,Rt) +def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst += mpyu($src2, $src3)", + [(set DoubleRegs:$dst, + (sub DoubleRegs:$src1, + (mul (i64 (anyext IntRegs:$src2)), + (i64 (anyext IntRegs:$src3)))))], + "$src1 = $dst">; + + +def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst += add($src2, $src3)", + [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3), + IntRegs:$src1))], + "$src1 = $dst">; + +def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, s8Imm:$src3), + "$dst += add($src2, #$src3)", + [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3), + IntRegs:$src1))], + "$src1 = $dst">; + +def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, IntRegs:$src3), + "$dst -= add($src2, $src3)", + [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2, + IntRegs:$src3)))], + "$src1 = $dst">; + +def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, + IntRegs:$src2, s8Imm:$src3), + "$dst -= add($src2, #$src3)", + [(set IntRegs:$dst, (sub IntRegs:$src1, + (add IntRegs:$src2, s8ImmPred:$src3)))], + "$src1 = $dst">; + +//===----------------------------------------------------------------------===// +// MTYPE/MPYH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/MPYS + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/MPYS - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VB + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VB - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MTYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// MTYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +/// Assumptions::: ****** DO NOT IGNORE ******** +/// 1. Make sure that in post increment store, the zero'th operand is always the +/// post increment operand. +/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the +/// last operand. +/// +// Store doubleword. +let isPredicable = 1 in +def STrid : STInst<(outs), + (ins MEMri:$addr, DoubleRegs:$src1), + "memd($addr) = $src1", + [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>; + +// Indexed store double word. +let AddedComplexity = 10, isPredicable = 1 in +def STrid_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_3Imm:$src2, DoubleRegs:$src3), + "memd($src1+#$src2) = $src3", + [(store DoubleRegs:$src3, + (add IntRegs:$src1, s11_3ImmPred:$src2))]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrid_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), + "memd(#$global+$offset) = $src", + []>; + +let hasCtrlDep = 1, isPredicable = 1 in +def POST_STdri : STInstPI<(outs IntRegs:$dst), + (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset), + "memd($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))], + "$src2 = $dst">; + +// Store doubleword conditionally. +// if ([!]Pv) memd(Rs+#u6:3)=Rtt +// if (Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if ($src1) memd($addr) = $src2", + []>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if (!$src1) memd($addr) = $src2", + []>; + +// if (Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if ($src1) memd($src2+#$src3) = $src4", + []>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if (!$src1) memd($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memd(Rx++#s4:3)=Rtt +// if (Pv) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if ($src1) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">; + +// if (!Pv) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if (!$src1) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">; + + +// Store byte. +// memb(Rs+#s11:0)=Rt +let isPredicable = 1 in +def STrib : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memb($addr) = $src1", + [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>; + +let AddedComplexity = 10, isPredicable = 1 in +def STrib_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3), + "memb($src1+#$src2) = $src3", + [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1, + s11_0ImmPred:$src2))]>; + +// memb(gp+#u16:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memb(#$global+$offset) = $src", + []>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STb_GP : STInst<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memb(#$global) = $src", + []>; + +// memb(Rx++#s4:0)=Rt +let hasCtrlDep = 1, isPredicable = 1 in +def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1, + IntRegs:$src2, + s4Imm:$offset), + "memb($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_truncsti8 IntRegs:$src1, IntRegs:$src2, + s4_0ImmPred:$offset))], + "$src2 = $dst">; + +// Store byte conditionally. +// if ([!]Pv) memb(Rs+#u6:0)=Rt +// if (Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memb($addr) = $src2", + []>; + +// if (!Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memb($addr) = $src2", + []>; + +// if (Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1) memb($src2+#$src3) = $src4", + []>; + +// if (!Pv) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1) memb($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memb(Rx++#s4:0)=Rt +// if (Pv) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">; + +// if (!Pv) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">; + + +// Store halfword. +// memh(Rs+#s11:1)=Rt +let isPredicable = 1 in +def STrih : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memh($addr) = $src1", + [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>; + + +let AddedComplexity = 10, isPredicable = 1 in +def STrih_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3), + "memh($src1+#$src2) = $src3", + [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1, + s11_1ImmPred:$src2))]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memh(#$global+$offset) = $src", + []>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STh_GP : STInst<(outs), + (ins globaladdress:$global, IntRegs:$src), + "memh(#$global) = $src", + []>; + +// memh(Rx++#s4:1)=Rt.H +// memh(Rx++#s4:1)=Rt +let hasCtrlDep = 1, isPredicable = 1 in +def POST_SThri : STInstPI<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), + "memh($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_truncsti16 IntRegs:$src1, IntRegs:$src2, + s4_1ImmPred:$offset))], + "$src2 = $dst">; + +// Store halfword conditionally. +// if ([!]Pv) memh(Rs+#u6:1)=Rt +// if (Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memh($addr) = $src2", + []>; + +// if (!Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memh($addr) = $src2", + []>; + +// if (Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1) memh($src2+#$src3) = $src4", + []>; + +// if (!Pv) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1) memh($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memh(Rx++#s4:1)=Rt +// if (Pv) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">; + +// if (!Pv) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">; + + +// Store word. +// Store predicate. +let Defs = [R10,R11] in +def STriw_pred : STInst<(outs), + (ins MEMri:$addr, PredRegs:$src1), + "Error; should not emit", + []>; + +// memw(Rs+#s11:2)=Rt +let isPredicable = 1 in +def STriw : STInst<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memw($addr) = $src1", + [(store IntRegs:$src1, ADDRriS11_2:$addr)]>; + +let AddedComplexity = 10, isPredicable = 1 in +def STriw_indexed : STInst<(outs), + (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), + "memw($src1+#$src2) = $src3", + [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>; + +def STriwt : STInst<(outs), + (ins MEMri:$addr, DoubleRegs:$src1), + "memw($addr) = $src1", + [(truncstorei32 DoubleRegs:$src1, ADDRriS11_2:$addr)]>; + +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP : STInst<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memw(#$global+$offset) = $src", + []>; + +let hasCtrlDep = 1, isPredicable = 1 in +def POST_STwri : STInstPI<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset), + "memw($src2++#$offset) = $src1", + [(set IntRegs:$dst, + (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))], + "$src2 = $dst">; + +// Store word conditionally. +// if ([!]Pv) memw(Rs+#u6:2)=Rt +// if (Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memw($addr) = $src2", + []>; + +// if (!Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cNotPt : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memw($addr) = $src2", + []>; + +// if (Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1) memw($src2+#$src3) = $src4", + []>; + +// if (!Pv) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cNotPt : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1) memw($src2+#$src3) = $src4", + []>; + +// if ([!]Pv) memw(Rx++#s4:2)=Rt +// if (Pv) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">; + +// if (!Pv) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">; + + + +// Allocate stack frame. +let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { + def ALLOCFRAME : STInst<(outs), + (ins i32imm:$amt), + "allocframe(#$amt)", + []>; +} +//===----------------------------------------------------------------------===// +// ST - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/ALU + +//===----------------------------------------------------------------------===// +// Logical NOT. +def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), + "$dst = not($src1)", + [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>; + + +// Sign extend word to doubleword. +def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + "$dst = sxtw($src1)", + [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>; +//===----------------------------------------------------------------------===// +// STYPE/ALU - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/BIT + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/BIT - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// STYPE/COMPLEX + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/COMPLEX - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/PERM + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/PERM - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/PRED + +//===----------------------------------------------------------------------===// +// Predicate transfer. +let neverHasSideEffects = 1 in +def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1), + "$dst = $src1 // Should almost never emit this", + []>; + +def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1 // Should almost never emit!", + [(set PredRegs:$dst, (trunc IntRegs:$src1))]>; +//===----------------------------------------------------------------------===// +// STYPE/PRED - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT + +//===----------------------------------------------------------------------===// +// Shift by immediate. +def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = asr($src1, #$src2)", + [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>; + +def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = asr($src1, #$src2)", + [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>; + +def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = asl($src1, #$src2)", + [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>; + +def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + "$dst = lsr($src1, #$src2)", + [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>; + +def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + "$dst = lsr($src1, #$src2)", + [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>; + +def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2, + u6Imm:$src3), + "$dst += lsr($src2, #$src3)", + [(set DoubleRegs:$dst, (add DoubleRegs:$src1, + (srl DoubleRegs:$src2, + u6ImmPred:$src3)))], + "$src1 = $dst">; + +// Shift by immediate and accumulate. +def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1, + IntRegs:$src2, + IntRegs:$src3), + "$dst += asr($src2, $src3)", + [], "$src1 = $dst">; + +// Shift by immediate and add. +def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + u3Imm:$src3), + "$dst = addasl($src1, $src2, #$src3)", + [(set IntRegs:$dst, (add IntRegs:$src1, + (shl IntRegs:$src2, + u3ImmPred:$src3)))]>; + +// Shift by register. +def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = asl($src1, $src2)", + [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>; + +def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = asr($src1, $src2)", + [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>; + + +def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = lsr($src1, $src2)", + [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>; + +def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + "$dst = lsl($src1, $src2)", + [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>; + +def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2), + "$dst = asr($src1, $src2)", + [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>; + +def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + IntRegs:$src2), + "$dst = lsr($src1, $src2)", + [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>; + +//===----------------------------------------------------------------------===// +// STYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VH + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VH - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// STYPE/VW + +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// STYPE/VW - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER + +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SYSTEM/USER + +//===----------------------------------------------------------------------===// +def SDHexagonBARRIER: SDTypeProfile<0, 0, []>; +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER, + [SDNPHasChain]>; + +let hasSideEffects = 1 in +def BARRIER : STInst<(outs), (ins), + "barrier", + [(HexagonBARRIER)]>; + +//===----------------------------------------------------------------------===// +// SYSTEM/SUPER - +//===----------------------------------------------------------------------===// + +// TFRI64 - assembly mapped. +let isReMaterializable = 1 in +def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1), + "$dst = #$src1", + [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>; + +// Pseudo instruction to encode a set of conditional transfers. +// This instruction is used instead of a mux and trades-off codesize +// for performance. We conduct this transformation optimistically in +// the hope that these instructions get promoted to dot-new transfers. +let AddedComplexity = 100 in +def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, + IntRegs:$src2, + IntRegs:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +let AddedComplexity = 100 in +def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3), + "Error; should not emit", + [(set IntRegs:$dst, (select PredRegs:$src1, + s12ImmPred:$src2, + s12ImmPred:$src3))]>; + +// Generate frameindex addresses. +let isReMaterializable = 1 in +def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1), + "$dst = add($src1)", + [(set IntRegs:$dst, ADDRri:$src1)]>; + +// +// CR - Type. +// +let neverHasSideEffects = 1, Defs = [SA0, LC0] in { +def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2), + "loop0($offset, #$src2)", + []>; +} + +let neverHasSideEffects = 1, Defs = [SA0, LC0] in { +def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2), + "loop0($offset, $src2)", + []>; +} + +let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1, + Defs = [PC, LC0], Uses = [SA0, LC0] in { +def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset), + ":endloop0", + []>; +} + +// Support for generating global address. +// Taken from X86InstrInfo.td. +def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; +def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; + +// This pattern is incorrect. When we add small data, we should change +// this pattern to use memw(#foo). +let isMoveImm = 1 in +def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, + (load (HexagonCONST32 tglobaltlsaddr:$global)))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, + (HexagonCONST32 tglobaladdr:$global))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt), + "$dst = CONST32(#$jt)", + [(set IntRegs:$dst, + (HexagonCONST32 tjumptable:$jt))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, + (HexagonCONST32_GP tglobaladdr:$global))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global), + "$dst = CONST32(#$global)", + [(set IntRegs:$dst, imm:$global) ]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label), + "$dst = CONST32($label)", + [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>; + +let isReMaterializable = 1, isMoveImm = 1 in +def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global), + "$dst = CONST64(#$global)", + [(set DoubleRegs:$dst, imm:$global) ]>; + +def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), + "$dst = xor($dst, $dst)", + [(set PredRegs:$dst, 0)]>; + +def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + "$dst = mpy($src1, $src2)", + [(set IntRegs:$dst, + (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)), + (i64 (sext IntRegs:$src2)))), + (i32 32)))))]>; + +// Pseudo instructions. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; + +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; + +def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; + +def call : SDNode<"HexagonISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, +// Optional Flag and Variable Arguments. +// Its 1 Operand has pointer type. +def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +let Defs = [R29, R30], Uses = [R31, R30, R29] in { + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + "Should never be emitted", + [(callseq_start timm:$amt)]>; +} + +let Defs = [R29, R30, R31], Uses = [R29] in { + def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "Should never be emitted", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} +// Call subroutine. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops), + "call $dst", []>; +} + +// Call subroutine from register. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops), + "callr $dst", + []>; + } + +// Tail Calls. +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops), + "jump $dst // TAILCALL", []>; +} +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops), + "jump $dst // TAILCALL", []>; +} + +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops), + "jumpr $dst // TAILCALL", []>; +} +// Map call instruction. +def : Pat<(call IntRegs:$dst), + (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>; +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>; +//Tail calls. +def : Pat<(HexagonTCRet tglobaladdr:$dst), + (TCRETURNtg tglobaladdr:$dst)>; +def : Pat<(HexagonTCRet texternalsym:$dst), + (TCRETURNtext texternalsym:$dst)>; +def : Pat<(HexagonTCRet IntRegs:$dst), + (TCRETURNR IntRegs:$dst)>; + +// Map from r0 = and(r1, 65535) to r0 = zxth(r1). +def : Pat <(and IntRegs:$src1, 65535), + (ZXTH IntRegs:$src1)>; + +// Map from r0 = and(r1, 255) to r0 = zxtb(r1). +def : Pat <(and IntRegs:$src1, 255), + (ZXTB IntRegs:$src1)>; + +// Map Add(p1, true) to p1 = not(p1). +// Add(p1, false) should never be produced, +// if it does, it got to be mapped to NOOP. +def : Pat <(add PredRegs:$src1, -1), + (NOT_pp PredRegs:$src1)>; + +// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) => +// p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1). +def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3, + IntRegs:$src4), + (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4, + IntRegs:$src3)>, Requires<[HasV2TOnly]>; + +// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). +def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3), + (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>; + +// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. +def : Pat <(brcond (not PredRegs:$src1), bb:$offset), + (JMP_PredNot PredRegs:$src1, bb:$offset)>; + +// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2). +def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)), + (AND_pnotp PredRegs:$src1, PredRegs:$src2)>; + +// Map from store(globaladdress + x) -> memd(#foo + x). +let AddedComplexity = 100 in +def : Pat <(store DoubleRegs:$src1, + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>; + +// Map from store(globaladdress) -> memd(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), + (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>; + +// Map from store(globaladdress + x) -> memw(#foo + x). +let AddedComplexity = 100 in +def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), + (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)), + (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>; + +// Map from store(globaladdress + x) -> memh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(truncstorei16 IntRegs:$src1, + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memh(#foo). +let AddedComplexity = 100 in +def : Pat <(truncstorei16 IntRegs:$src1, + (HexagonCONST32_GP tglobaladdr:$global)), + (STh_GP tglobaladdr:$global, IntRegs:$src1)>; + +// Map from store(globaladdress + x) -> memb(#foo + x). +let AddedComplexity = 100 in +def : Pat <(truncstorei8 IntRegs:$src1, + (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>; + +// Map from store(globaladdress) -> memb(#foo). +let AddedComplexity = 100 in +def : Pat <(truncstorei8 IntRegs:$src1, + (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP tglobaladdr:$global, IntRegs:$src1)>; + +// Map from load(globaladdress + x) -> memw(#foo + x). +let AddedComplexity = 100 in +def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)), + (LDw_GP tglobaladdr:$global)>; + +// Map from load(globaladdress + x) -> memd(#foo + x). +let AddedComplexity = 100 in +def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset))), + (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memw(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), + (LDd_GP tglobaladdr:$global)>; + + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd. +let AddedComplexity = 100 in +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>; + +// Map from load(globaladdress + x) -> memh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDrih_GP tglobaladdr:$global, 0)>; + +// Map from load(globaladdress + x) -> memuh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memuh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDriuh_GP tglobaladdr:$global, 0)>; + +// Map from load(globaladdress + x) -> memuh(#foo + x). +let AddedComplexity = 100 in +def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memuh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDriuh_GP tglobaladdr:$global, 0)>; +// Map from load(globaladdress + x) -> memub(#foo + x). +let AddedComplexity = 100 in +def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memuh(#foo + 0). +let AddedComplexity = 100 in +def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDriub_GP tglobaladdr:$global, 0)>; + +// Map from load(globaladdress + x) -> memb(#foo + x). +let AddedComplexity = 100 in +def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), + u16ImmPred:$offset)), + (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>; + +// Map from load(globaladdress) -> memb(#foo). +let AddedComplexity = 100 in +def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memb(#foo). +let AddedComplexity = 100 in +def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memub(#foo). +let AddedComplexity = 100 in +def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)), + (LDub_GP tglobaladdr:$global)>; + +// When the Interprocedural Global Variable optimizer realizes that a +// certain global variable takes only two constant values, it shrinks the +// global to a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in +def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +let AddedComplexity = 100 in +def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)), + (LDb_GP tglobaladdr:$global)>; + +let AddedComplexity = 100 in +def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)), + (LDub_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memh(#foo). +let AddedComplexity = 100 in +def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDh_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memh(#foo). +let AddedComplexity = 100 in +def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDh_GP tglobaladdr:$global)>; + +// Map from load(globaladdress) -> memuh(#foo). +let AddedComplexity = 100 in +def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)), + (LDuh_GP tglobaladdr:$global)>; + +// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. +def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), + (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>; + +// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo). +def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)), + (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>; + +// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)). +def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)), + (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>; + +// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)). +def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)), + (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>; + +// We want to prevent emiting pnot's as much as possible. +// Map brcond with an unsupported setcc to a JMP_PredNot. +def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset), + (JMP_PredNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset), + (JMP_PredNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset), + (JMP_PredNot PredRegs:$src1, bb:$offset)>; + +def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset), + (JMP_Pred PredRegs:$src1, bb:$offset)>; + +def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset), + (JMP_PredNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset), + (JMP_Pred (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), + bb:$offset), + (JMP_PredNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1), + bb:$offset)>; + +def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset), + (JMP_PredNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>; + +def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), + bb:$offset), + (JMP_PredNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2), + bb:$offset)>; + +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), + (COMBINE_rr + (MUX_rr PredRegs:$src1, + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)), + (MUX_rr PredRegs:$src1, + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>; + +// Map from a 1-bit select to logical ops. +// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). +def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3), + (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2), + (AND_pp (NOT_pp PredRegs:$src1), PredRegs:$src3))>; + +// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs. +def : Pat<(i1 (load ADDRriS11_2:$addr)), + (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>; + +// Map for truncating from 64 immediates to 32 bit immediates. +def : Pat<(i32 (trunc DoubleRegs:$src)), + (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>; + +// Map for truncating from i64 immediates to i1 bit immediates. +def : Pat<(i1 (trunc DoubleRegs:$src)), + (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>; + +// Map memw(Rs) = Rdd -> memw(Rs) = Rt. +def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr), + (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, + subreg_loreg)))>; + +// Map memh(Rs) = Rdd -> memh(Rs) = Rt. +def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr), + (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src, + subreg_loreg)))>; + +// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0. +def : Pat<(store (i1 -1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (TFRI 1))>; + +let AddedComplexity = 100 in +// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1; +// memw(#foo) = r0 +def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP tglobaladdr:$global, (TFRI 1))>; + + +// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. +def : Pat<(store (i1 -1), ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (TFRI 1))>; + +// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt. +def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr), + (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>; + +// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs). +// Hexagon_TODO: We can probably use combine but that will cost 2 instructions. +// Better way to do this? +def : Pat<(i64 (anyext IntRegs:$src1)), + (i64 (SXTW IntRegs:$src1))>; + +// Map cmple -> cmpgt. +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)), + (i1 (NOT_Ps (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>; + +// rs <= rt -> !(rs > rt). +def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>; + +// Rss <= Rtt -> !(Rss > Rtt). +def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; + +// Map cmpne -> cmpeq. +// Hexagon_TODO: We should improve on this. +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)), + (i1 (NOT_Ps(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>; + +// Map cmpne(Rs) -> !cmpeqe(Rs). +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>; + +// Convert setne back to xor for hexagon since we compute w/ pred registers. +def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)), + (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>; + +// Map cmpne(Rss) -> !cmpew(Rss). +// rs != rt -> !(rs == rt). +def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>; + +// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>; + +def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)), + (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>; + +// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). +// rss >= rtt -> !(rtt > rss). +def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>; + +// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). +// rs < rt -> !(rs >= rt). +def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), + (i1 (NOT_Ps (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>; + +// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt). +// rs < rt -> rs < rt. Let assembler map it. +def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)), + (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>; + +// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss). +// rss < rtt -> (rtt > rss). +def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1). +// rs < rt -> rt > rs. +def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)), + (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>; + +// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1). +// rs < rt -> rt > rs. +def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>; + +// Map from Rs >= Rt -> !(Rt > Rs). +// rs >= rt -> !(rt > rs). +def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>; + +// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)), + (i1 (NOT_Ps (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>; + +// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). +// Map from (Rs <= Rt) -> !(Rs > Rt). +def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)), + (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>; + +// Sign extends. +// i1 -> i32 +def : Pat <(i32 (sext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, -1, 0))>; + +// Convert sign-extended load back to load and sign extend. +// i8 -> i64 +def: Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)), + (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; + +// Convert any-extended load back to load and sign extend. +// i8 -> i64 +def: Pat <(i64 (extloadi8 ADDRriS11_0:$src1)), + (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>; + +// Convert sign-extended load back to load and sign extend. +// i16 -> i64 +def: Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)), + (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>; + +// Convert sign-extended load back to load and sign extend. +// i32 -> i64 +def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)), + (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>; + + +// Zero extends. +// i1 -> i32 +def : Pat <(i32 (zext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, 1, 0))>; + +// i1 -> i64 +def : Pat <(i64 (zext PredRegs:$src1)), + (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>; + +// i32 -> i64 +def : Pat <(i64 (zext IntRegs:$src1)), + (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>; + +// i8 -> i64 +def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>; + +// i16 -> i64 +def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>; + +// i32 -> i64 +def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + +def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), + (i32 (LDriw ADDRriS11_0:$src1))>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def : Pat <(i32 (zext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, 1, 0))>; + +// Map from Rs = Pd to Pd = mux(Pd, #1, #0) +def : Pat <(i32 (anyext PredRegs:$src1)), + (i32 (MUX_ii PredRegs:$src1, 1, 0))>; + +// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0)) +def : Pat <(i64 (anyext PredRegs:$src1)), + (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>; + + +// Any extended 64-bit load. +// anyext i32 -> i64 +def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; + +// anyext i16 -> i64. +def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>; + +// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). +def : Pat<(i64 (zext IntRegs:$src1)), + (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>; + +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2), + (MPYU64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG + (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG (LSRd_ri(MPYU64 + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + 32) ,subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), + 32),subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg) + )>; + +// Multiply 64-bit signed and use upper result. +def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2), + (MPY64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG + (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0), + (EXTRACT_SUBREG (LSRd_ri(MPYU64 + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + 32) ,subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, + subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, + subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), + 32),subreg_loreg)), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg) + )>; + +// Hexagon specific ISD nodes. +def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>; +def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC", + SDTHexagonADJDYNALLOC>; +// Needed to tag these instructions for stack layout. +let usesCustomInserter = 1 in +def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1, + s16Imm:$src2), + "$dst = add($src1, #$src2)", + [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1, + s16ImmPred:$src2))]>; + +def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>; +def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>; +def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1), + "$dst = $src1", + [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>; + +let AddedComplexity = 100 in +def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)), + (TFR IntRegs:$src1)>; + + +def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>; + +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in +def BR_JT : JRInst<(outs), (ins IntRegs:$src), + "jumpr $src", + [(HexagonBR_JT IntRegs:$src)]>; +def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; + +def : Pat<(HexagonWrapperJT tjumptable:$dst), + (CONST32_set_jt tjumptable:$dst)>; + + +//===----------------------------------------------------------------------===// +// V3 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV3.td" + +//===----------------------------------------------------------------------===// +// V3 Instructions - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// V4 Instructions + +//===----------------------------------------------------------------------===// + +include "HexagonInstrInfoV4.td" diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td new file mode 100644 index 0000000..a73897e --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td @@ -0,0 +1,134 @@ +//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// J + +//===----------------------------------------------------------------------===// +// Call subroutine. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, + P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops), + "call $dst", []>, Requires<[HasV3T]>; +} + +//===----------------------------------------------------------------------===// +// J - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// JR + +//===----------------------------------------------------------------------===// +// Call subroutine from register. +let isCall = 1, neverHasSideEffects = 1, + Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31, + P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { + def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops), + "callr $dst", + []>, Requires<[HasV3TOnly]>; + } + + +// if(p?.new) jumpr:t r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) jumpr:t $src2", + []>, Requires<[HasV3T]>; +} + +// if (!p?.new) jumpr:t r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cNotPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) jumpr:t $src2", + []>, Requires<[HasV3T]>; +} + +// Not taken. +// if(p?.new) jumpr:nt r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) jumpr:nt $src2", + []>, Requires<[HasV3T]>; +} + +// if (!p?.new) jumpr:nt r? +let isReturn = 1, isTerminator = 1, isBarrier = 1, + Defs = [PC], Uses = [R31] in { + def JMPR_cNotPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) jumpr:nt $src2", + []>, Requires<[HasV3T]>; +} + +//===----------------------------------------------------------------------===// +// JR - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ALU64/ALU + +//===----------------------------------------------------------------------===// + +let AddedComplexity = 200 in +def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = max($src2, $src1)", + [(set DoubleRegs:$dst, (select (i1 (setlt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2))]>, +Requires<[HasV3T]>; + +let AddedComplexity = 200 in +def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, + DoubleRegs:$src2), + "$dst = min($src2, $src1)", + [(set DoubleRegs:$dst, (select (i1 (setgt DoubleRegs:$src2, + DoubleRegs:$src1)), + DoubleRegs:$src1, + DoubleRegs:$src2))]>, +Requires<[HasV3T]>; + +//===----------------------------------------------------------------------===// +// ALU64/ALU - +//===----------------------------------------------------------------------===// + + + + +//def : Pat <(brcond (i1 (seteq IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegEzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setne IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegNzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setle IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegLezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setge IntRegs:$src1, 0)), bb:$offset), +// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + +//def : Pat <(brcond (i1 (setgt IntRegs:$src1, -1)), bb:$offset), +// (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>; + + +// Map call instruction +def : Pat<(call IntRegs:$dst), + (CALLRv3 IntRegs:$dst)>, Requires<[HasV3T]>; +def : Pat<(call tglobaladdr:$dst), + (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>; +def : Pat<(call texternalsym:$dst), + (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>; diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td new file mode 100644 index 0000000..24218d0 --- /dev/null +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -0,0 +1,3392 @@ +//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V4 instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +// Hexagon V4 Architecture spec defines 8 instruction classes: +// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the +// compiler) + +// LD Instructions: +// ======================================== +// Loads (8/16/32/64 bit) +// Deallocframe + +// ST Instructions: +// ======================================== +// Stores (8/16/32/64 bit) +// Allocframe + +// ALU32 Instructions: +// ======================================== +// Arithmetic / Logical (32 bit) +// Vector Halfword + +// XTYPE Instructions (32/64 bit): +// ======================================== +// Arithmetic, Logical, Bit Manipulation +// Multiply (Integer, Fractional, Complex) +// Permute / Vector Permute Operations +// Predicate Operations +// Shift / Shift with Add/Sub/Logical +// Vector Byte ALU +// Vector Halfword (ALU, Shift, Multiply) +// Vector Word (ALU, Shift) + +// J Instructions: +// ======================================== +// Jump/Call PC-relative + +// JR Instructions: +// ======================================== +// Jump/Call Register + +// MEMOP Instructions: +// ======================================== +// Operation on memory (8/16/32 bit) + +// NV Instructions: +// ======================================== +// New-value Jumps +// New-value Stores + +// CR Instructions: +// ======================================== +// Control-Register Transfers +// Hardware Loop Setup +// Predicate Logicals & Reductions + +// SYSTEM Instructions (not implemented in the compiler): +// ======================================== +// Prefetch +// Cache Maintenance +// Bus Operations + + +//===----------------------------------------------------------------------===// +// ALU32 + +//===----------------------------------------------------------------------===// + +// Shift halfword. + +def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = aslh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = asrh($src2)", + []>, + Requires<[HasV4T]>; + +// Sign extend. + +def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + +def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + +def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + +def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = sxtb($src2)", + []>, + Requires<[HasV4T]>; + + +def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = sxth($src2)", + []>, + Requires<[HasV4T]>; + +// Zero exten. + +let neverHasSideEffects = 1 in +def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = zxtb($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if ($src1.new) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + +let neverHasSideEffects = 1 in +def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + "if (!$src1.new) $dst = zxth($src2)", + []>, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// ALU32 - +//===----------------------------------------------------------------------===// + + + +//===----------------------------------------------------------------------===// +// LD + +//===----------------------------------------------------------------------===// +/// +/// Make sure that in post increment load, the first operand is always the post +/// increment operand. +/// +//// Load doubleword. +// Rdd=memd(Re=#U6) + +// Rdd=memd(Rs+Rt<<#u2) +// Special case pattern for indexed load without offset which is easier to +// match. AddedComplexity of this pattern should be lower than base+offset load +// and lower yet than the more generic version with offset/shift below +// Similar approach is taken for all other base+index loads. +let AddedComplexity = 10, isPredicable = 1 in +def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memd($src1+$src2<<#0)", + [(set DoubleRegs:$dst, (load (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memd($src1+$src2<<#$offset)", + [(set DoubleRegs:$dst, (load (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load doubleword conditionally. +// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2) +// if (Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memd($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memd(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rdd=memd(Rt<<#u2+#U6) + +//// Load byte. +// Rd=memb(Re=#U6) + +// Rd=memb(Rs+Rt<<#u2) +let AddedComplexity = 10, isPredicable = 1 in +def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memb($src1+$src2<<#0)", + [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memub($src1+$src2<<#0)", + [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memub($src1+$src2<<#0)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memb($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (sextloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memub($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (zextloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memub($src1+$src2<<#$offset)", + [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load byte conditionally. +// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2) +// if (Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memb($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memb(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +//// Load unsigned byte conditionally. +// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2) +// if (Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memub($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memub(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rd=memb(Rt<<#u2+#U6) + +//// Load halfword +// Rd=memh(Re=#U6) + +// Rd=memh(Rs+Rt<<#u2) +let AddedComplexity = 10, isPredicable = 1 in +def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memh($src1+$src2<<#0)", + [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memuh($src1+$src2<<#0)", + [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 10, isPredicable = 1 in +def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memuh($src1+$src2<<#0)", + [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +// Rd=memh(Rs+Rt<<#u2) +let AddedComplexity = 40, isPredicable = 1 in +def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memh($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (sextloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memuh($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (zextloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +let AddedComplexity = 40, isPredicable = 1 in +def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memuh($src1+$src2<<#$offset)", + [(set IntRegs:$dst, + (extloadi16 (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load halfword conditionally. +// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2) +// if (Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +//// Load unsigned halfword conditionally. +// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2) +// if (Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memuh($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rd=memh(Rt<<#u2+#U6) + +//// Load word. +// Rd=memw(Re=#U6) + +// Rd=memw(Rs+Rt<<#u2) +let AddedComplexity = 10, isPredicable = 1 in +def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst=memw($src1+$src2<<#0)", + [(set IntRegs:$dst, (load (add IntRegs:$src1, + IntRegs:$src2)))]>, + Requires<[HasV4T]>; + +// Rd=memw(Rs+Rt<<#u2) +let AddedComplexity = 40, isPredicable = 1 in +def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset), + "$dst=memw($src1+$src2<<#$offset)", + [(set IntRegs:$dst, (load (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$offset))))]>, + Requires<[HasV4T]>; + +//// Load word conditionally. +// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2) +// if (Pv) Rd=memw(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if ($src1.new) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 15 in +def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "if (!$src1.new) $dst=memw($src2+$src3<<#0)", + []>, + Requires<[HasV4T]>; + +// if (Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if ($src1.new) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) Rd=memh(Rs+Rt<<#u2) +let mayLoad = 1, AddedComplexity = 45 in +def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, + u2Imm:$offset), + "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)", + []>, + Requires<[HasV4T]>; + +// Rd=memw(Rt<<#u2+#U6) + + +// Post-inc Load, Predicated, Dot new + + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if ($src1.new) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3), + "if (!$src1.new) $dst1 = memd($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1.new) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1.new) $dst1 = memb($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1.new) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1.new) $dst1 = memh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if ($src1.new) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3), + "if (!$src1.new) $dst1 = memub($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if ($src1.new) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3), + "if (!$src1.new) $dst1 = memuh($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if ($src1.new) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + +let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in +def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2), + (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3), + "if (!$src1.new) $dst1 = memw($src2++#$src3)", + [], + "$src2 = $dst2">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// LD - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// ST + +//===----------------------------------------------------------------------===// +/// +/// Assumptions::: ****** DO NOT IGNORE ******** +/// 1. Make sure that in post increment store, the zero'th operand is always the +/// post increment operand. +/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the +/// last operand. +/// + +// Store doubleword. +// memd(Re=#U6)=Rtt +// TODO: needs to be implemented + +// memd(Rs+#s11:3)=Rtt +// memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, isPredicable = 1 in +def STrid_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4), + "memd($src1+$src2<<#$src3) = $src4", + [(store DoubleRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memd(Ru<<#u2+#U6)=Rtt +let AddedComplexity = 10 in +def STrid_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4), + "memd($src1<<#$src2+#$src3) = $src4", + [(store DoubleRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, + u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memd(Rx++#s4:3)=Rtt +// memd(Rx++#s4:3:circ(Mu))=Rtt +// memd(Rx++I:circ(Mu))=Rtt +// memd(Rx++Mu)=Rtt +// memd(Rx++Mu:brev)=Rtt +// memd(gp+#u16:3)=Rtt + +// Store doubleword conditionally. +// if ([!]Pv[.new]) memd(#u6)=Rtt +// TODO: needs to be implemented. + +// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt +// if (Pv) memd(Rs+#u6:3)=Rtt +// if (Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if ($src1.new) memd($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +// if (!Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2), + "if (!$src1.new) memd($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memd(Rs+#u6:3)=Rtt +// if (Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if ($src1.new) memd($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rs+#u6:3)=Rtt +// if (!Pv.new) memd(Rs+#u6:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3, + DoubleRegs:$src4), + "if (!$src1.new) memd($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt +// if (Pv) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if ($src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if ($src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; +// if (!Pv) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if (!$src1) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; +// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + DoubleRegs:$src5), + "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt +// if (Pv) memd(Rx++#s4:3)=Rtt +// if (Pv.new) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if ($src1.new) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memd(Rx++#s4:3)=Rtt +// if (!Pv.new) memd(Rx++#s4:3)=Rtt +let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in +def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3, + s4_3Imm:$offset), + "if (!$src1.new) memd($src3++#$offset) = $src2", + [], + "$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store byte. +// memb(Re=#U6)=Rt +// TODO: needs to be implemented. +// memb(Rs+#s11:0)=Rt +// memb(Rs+#u6:0)=#S8 +let AddedComplexity = 10, isPredicable = 1 in +def STrib_imm_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3), + "memb($src1+#$src2) = #$src3", + [(truncstorei8 s8ImmPred:$src3, (add IntRegs:$src1, + u6_0ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// memb(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, isPredicable = 1 in +def STrib_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memb($src1+$src2<<#$src3) = $src4", + [(truncstorei8 IntRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memb(Ru<<#u2+#U6)=Rt +let AddedComplexity = 10 in +def STrib_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memb($src1<<#$src2+#$src3) = $src4", + [(truncstorei8 IntRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, + u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0:circ(Mu))=Rt +// memb(Rx++I:circ(Mu))=Rt +// memb(Rx++Mu)=Rt +// memb(Rx++Mu:brev)=Rt +// memb(gp+#u16:0)=Rt + + +// Store byte conditionally. +// if ([!]Pv[.new]) memb(#u6)=Rt +// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6 +// if (Pv) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if ($src1) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if ($src1.new) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if (!$src1) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_imm_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4), + "if (!$src1.new) memb($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt +// if (Pv) memb(Rs+#u6:0)=Rt +// if (Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memb($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=Rt +// if (!Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memb($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memb(Rs+#u6:0)=Rt +// if (!Pv) memb(Rs+#u6:0)=Rt +// if (Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1.new) memb($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memb($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt +// if (Pv) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt +// if (Pv) memb(Rx++#s4:0)=Rt +// if (Pv.new) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1.new) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rx++#s4:0)=Rt +// if (!Pv.new) memb(Rx++#s4:0)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1.new) memb($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store halfword. +// memh(Re=#U6)=Rt.H +// TODO: needs to be implemented + +// memh(Re=#U6)=Rt +// TODO: needs to be implemented + +// memh(Rs+#s11:1)=Rt.H +// memh(Rs+#s11:1)=Rt +// memh(Rs+#u6:1)=#S8 +let AddedComplexity = 10, isPredicable = 1 in +def STrih_imm_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3), + "memh($src1+#$src2) = #$src3", + [(truncstorei16 s8ImmPred:$src3, (add IntRegs:$src1, + u6_1ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// memh(Rs+Ru<<#u2)=Rt.H +// TODO: needs to be implemented. + +// memh(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, isPredicable = 1 in +def STrih_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memh($src1+$src2<<#$src3) = $src4", + [(truncstorei16 IntRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, + u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memh(Ru<<#u2+#U6)=Rt.H +// memh(Ru<<#u2+#U6)=Rt +let AddedComplexity = 10 in +def STrih_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memh($src1<<#$src2+#$src3) = $src4", + [(truncstorei16 IntRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, + u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memh(Rx++#s4:1:circ(Mu))=Rt.H +// memh(Rx++#s4:1:circ(Mu))=Rt +// memh(Rx++I:circ(Mu))=Rt.H +// memh(Rx++I:circ(Mu))=Rt +// memh(Rx++Mu)=Rt.H +// memh(Rx++Mu)=Rt +// memh(Rx++Mu:brev)=Rt.H +// memh(Rx++Mu:brev)=Rt +// memh(gp+#u16:1)=Rt.H +// memh(gp+#u16:1)=Rt + + +// Store halfword conditionally. +// if ([!]Pv[.new]) memh(#u6)=Rt.H +// if ([!]Pv[.new]) memh(#u6)=Rt + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6 +// if (Pv) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if ($src1) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if ($src1.new) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if (!$src1) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_imm_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4), + "if (!$src1.new) memh($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H +// TODO: needs to be implemented. + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt +// if (Pv) memh(Rs+#u6:1)=Rt +// if (Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memh($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=Rt +// if (!Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memh($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1.new) memh($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memh($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H +// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt +// if (Pv) memh(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+Ru<<#u2)=Rt +def STrih_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H +// TODO: Needs to be implemented. + +// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt +// if (Pv) memh(Rx++#s4:1)=Rt +// if (Pv.new) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1.new) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rx++#s4:1)=Rt +// if (!Pv.new) memh(Rx++#s4:1)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1.new) memh($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store word. +// memw(Re=#U6)=Rt +// TODO: Needs to be implemented. + +// memw(Rs+#s11:2)=Rt +// memw(Rs+#u6:2)=#S8 +let AddedComplexity = 10, isPredicable = 1 in +def STriw_imm_V4 : STInst<(outs), + (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3), + "memw($src1+#$src2) = #$src3", + [(store s8ImmPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// memw(Rs+Ru<<#u2)=Rt +let AddedComplexity = 10, isPredicable = 1 in +def STriw_indexed_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memw($src1+$src2<<#$src3) = $src4", + [(store IntRegs:$src4, (add IntRegs:$src1, + (shl IntRegs:$src2, u2ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memw(Ru<<#u2+#U6)=Rt +let AddedComplexity = 10 in +def STriw_shl_V4 : STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memw($src1<<#$src2+#$src3) = $src4", + [(store IntRegs:$src4, (shl IntRegs:$src1, + (add u2ImmPred:$src2, u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// memw(Rx++#s4:2)=Rt +// memw(Rx++#s4:2:circ(Mu))=Rt +// memw(Rx++I:circ(Mu))=Rt +// memw(Rx++Mu)=Rt +// memw(Rx++Mu:brev)=Rt +// memw(gp+#u16:2)=Rt + + +// Store word conditionally. +// if ([!]Pv[.new]) memw(#u6)=Rt +// TODO: Needs to be implemented. + +// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6 +// if (Pv) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if ($src1) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if ($src1.new) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if (!$src1) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=#S6 +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_imm_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4), + "if (!$src1.new) memw($src2+#$src3) = #$src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt +// if (Pv) memw(Rs+#u6:2)=Rt +// if (Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memw($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=Rt +// if (!Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memw($addr) = $src2", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(Rs+#u6:2)=Rt +// if (!Pv) memw(Rs+#u6:2)=Rt +// if (Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1.new) memw($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=Rt +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memw($src2+#$src3) = $src4", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt +// if (Pv) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt +// if (Pv) memw(Rx++#s4:2)=Rt +// if (Pv.new) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1.new) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rx++#s4:2)=Rt +// if (!Pv.new) memw(Rx++#s4:2)=Rt +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1.new) memw($src3++#$offset) = $src2", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------=== +// ST - +//===----------------------------------------------------------------------=== + + +//===----------------------------------------------------------------------===// +// NV/ST + +//===----------------------------------------------------------------------===// + +// Store new-value byte. + +// memb(Re=#U6)=Nt.new +// memb(Rs+#s11:0)=Nt.new +let mayStore = 1, isPredicable = 1 in +def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1), + "memb($addr) = $src1.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, isPredicable = 1 in +def STrib_indexed_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3), + "memb($src1+#$src2) = $src3.new", + []>, + Requires<[HasV4T]>; + +// memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in +def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memb($src1+$src2<<#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memb(Ru<<#u2+#U6)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memb($src1<<#$src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in +def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset), + "memb($src2++#$offset) = $src1.new", + [], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// memb(Rx++#s4:0:circ(Mu))=Nt.new +// memb(Rx++I:circ(Mu))=Nt.new +// memb(Rx++Mu)=Nt.new +// memb(Rx++Mu:brev)=Nt.new + +// memb(gp+#u16:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memb(#$global+$offset) = $src.new", + []>, + Requires<[HasV4T]>; + + +// Store new-value byte conditionally. +// if ([!]Pv[.new]) memb(#u6)=Nt.new +// if (Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memb($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if ($src1.new) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+#u6:0)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memb($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + + +// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new +// if (Pv) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new +// if (Pv) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (Pv.new) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if ($src1.new) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv.new) memb(Rx++#s4:0)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset), + "if (!$src1.new) memb($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store new-value halfword. +// memh(Re=#U6)=Nt.new +// memh(Rs+#s11:1)=Nt.new +let mayStore = 1, isPredicable = 1 in +def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1), + "memh($addr) = $src1.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, isPredicable = 1 in +def STrih_indexed_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3), + "memh($src1+#$src2) = $src3.new", + []>, + Requires<[HasV4T]>; + +// memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in +def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memh($src1+$src2<<#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memh(Ru<<#u2+#U6)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memh($src1<<#$src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in +def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset), + "memh($src2++#$offset) = $src1.new", + [], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// memh(Rx++#s4:1:circ(Mu))=Nt.new +// memh(Rx++I:circ(Mu))=Nt.new +// memh(Rx++Mu)=Nt.new +// memh(Rx++Mu:brev)=Nt.new + +// memh(gp+#u16:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memh(#$global+$offset) = $src.new", + []>, + Requires<[HasV4T]>; + + +// Store new-value halfword conditionally. + +// if ([!]Pv[.new]) memh(#u6)=Nt.new + +// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new +// if (Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memh($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if ($src1.new) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+#u6:1)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memh($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new +// if (Pv) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new +// if (Pv) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (Pv.new) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if ($src1.new) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv.new) memh(Rx++#s4:1)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset), + "if (!$src1.new) memh($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +// Store new-value word. + +// memw(Re=#U6)=Nt.new +// memw(Rs+#s11:2)=Nt.new +let mayStore = 1, isPredicable = 1 in +def STriw_nv_V4 : NVInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$src1), + "memw($addr) = $src1.new", + []>, + Requires<[HasV4T]>; + +let mayStore = 1, isPredicable = 1 in +def STriw_indexed_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3), + "memw($src1+#$src2) = $src3.new", + []>, + Requires<[HasV4T]>; + +// memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in +def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4), + "memw($src1+$src2<<#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memw(Ru<<#u2+#U6)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_shl_nv_V4 : NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4), + "memw($src1<<#$src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1, isPredicable = 1 in +def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset), + "memw($src2++#$offset) = $src1.new", + [], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// memw(Rx++#s4:2:circ(Mu))=Nt.new +// memw(Rx++I:circ(Mu))=Nt.new +// memw(Rx++Mu)=Nt.new +// memw(Rx++Mu:brev)=Nt.new +// memw(gp+#u16:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_GP_nv_V4 : NVInst_V4<(outs), + (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), + "memw(#$global+$offset) = $src.new", + []>, + Requires<[HasV4T]>; + + +// Store new-value word conditionally. + +// if ([!]Pv[.new]) memw(#u6)=Nt.new + +// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new +// if (Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if ($src1.new) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2), + "if (!$src1.new) memw($addr) = $src2.new", + []>, + Requires<[HasV4T]>; + +// if (Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if ($src1.new) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+#u6:2)=Nt.new +let mayStore = 1, neverHasSideEffects = 1 in +def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4), + "if (!$src1.new) memw($src2+#$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + + +// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new +// if (Pv) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new +let mayStore = 1, AddedComplexity = 10 in +def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4, + IntRegs:$src5), + "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new", + []>, + Requires<[HasV4T]>; + +// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new +// if (Pv) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (Pv.new) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if ($src1.new) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + +// if (!Pv.new) memw(Rx++#s4:2)=Nt.new +let mayStore = 1, hasCtrlDep = 1 in +def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset), + "if (!$src1.new) memw($src3++#$offset) = $src2.new", + [],"$src3 = $dst">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// NV/ST - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/ALU + +//===----------------------------------------------------------------------===// + +// Add and accumulate. +// Rd=add(Rs,add(Ru,#s6)) +def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), + "$dst = add($src1, add($src2, #$src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (add IntRegs:$src2, s6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// Rd=add(Rs,sub(#s6,Ru)) +def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + "$dst = add($src1, sub(#$src2, $src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (sub s6ImmPred:$src2, IntRegs:$src3)))]>, + Requires<[HasV4T]>; + +// Generates the same instruction as ADDr_SUBri_V4 but matches different +// pattern. +// Rd=add(Rs,sub(#s6,Ru)) +def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + "$dst = add($src1, sub(#$src2, $src3))", + [(set IntRegs:$dst, + (sub (add IntRegs:$src1, s6ImmPred:$src2), IntRegs:$src3))]>, + Requires<[HasV4T]>; + + +// Add or subtract doublewords with carry. +//TODO: +// Rdd=add(Rss,Rtt,Px):carry +//TODO: +// Rdd=sub(Rss,Rtt,Px):carry + + +// Logical doublewords. +// Rdd=and(Rtt,~Rss) +def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = and($src1, ~$src2)", + [(set DoubleRegs:$dst, (and DoubleRegs:$src1, + (not DoubleRegs:$src2)))]>, + Requires<[HasV4T]>; + +// Rdd=or(Rtt,~Rss) +def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2), + "$dst = or($src1, ~$src2)", + [(set DoubleRegs:$dst, + (or DoubleRegs:$src1, (not DoubleRegs:$src2)))]>, + Requires<[HasV4T]>; + + +// Logical-logical doublewords. +// Rxx^=xor(Rss,Rtt) +def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3), + "$dst ^= xor($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (xor DoubleRegs:$src2, DoubleRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + + +// Logical-logical words. +// Rx=or(Ru,and(Rx,#s10)) +def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + "$dst = or($src1, and($src2, #$src3))", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=and(Rs,Rt) +// Rx&=and(Rs,Rt) +def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= and($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,Rt) +def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= and($src2, $src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=and(Rs,Rt) +def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= and($src2, $src3)", + [(set IntRegs:$dst, + (xor IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=and(Rs,~Rt) +// Rx&=and(Rs,~Rt) +def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= and($src2, ~$src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,~Rt) +def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= and($src2, ~$src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=and(Rs,~Rt) +def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= and($src2, ~$src3)", + [(set IntRegs:$dst, + (xor IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=or(Rs,Rt) +// Rx&=or(Rs,Rt) +def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= or($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=or(Rs,Rt) +def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= or($src2, $src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=or(Rs,Rt) +def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= or($src2, $src3)", + [(set IntRegs:$dst, + (xor IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx[&|^]=xor(Rs,Rt) +// Rx&=xor(Rs,Rt) +def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst &= xor($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=xor(Rs,Rt) +def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst |= xor($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx^=xor(Rs,Rt) +def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3), + "$dst ^= xor($src2, $src3)", + [(set IntRegs:$dst, + (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=and(Rs,#s10) +def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + "$dst |= and($src2, #$src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +// Rx|=or(Rs,#s10) +def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3), + "$dst |= or($src2, #$src3)", + [(set IntRegs:$dst, + (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + + +// Modulo wrap +// Rd=modwrap(Rs,Rt) +// Round +// Rd=cround(Rs,#u5) +// Rd=cround(Rs,Rt) +// Rd=round(Rs,#u5)[:sat] +// Rd=round(Rs,Rt)[:sat] +// Vector reduce add unsigned halfwords +// Rd=vraddh(Rss,Rtt) +// Vector add bytes +// Rdd=vaddb(Rss,Rtt) +// Vector conditional negate +// Rdd=vcnegh(Rss,Rt) +// Rxx+=vrcnegh(Rss,Rt) +// Vector maximum bytes +// Rdd=vmaxb(Rtt,Rss) +// Vector reduce maximum halfwords +// Rxx=vrmaxh(Rss,Ru) +// Rxx=vrmaxuh(Rss,Ru) +// Vector reduce maximum words +// Rxx=vrmaxuw(Rss,Ru) +// Rxx=vrmaxw(Rss,Ru) +// Vector minimum bytes +// Rdd=vminb(Rtt,Rss) +// Vector reduce minimum halfwords +// Rxx=vrminh(Rss,Ru) +// Rxx=vrminuh(Rss,Ru) +// Vector reduce minimum words +// Rxx=vrminuw(Rss,Ru) +// Rxx=vrminw(Rss,Ru) +// Vector subtract bytes +// Rdd=vsubb(Rss,Rtt) + +//===----------------------------------------------------------------------===// +// XTYPE/ALU - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/MPY + +//===----------------------------------------------------------------------===// + +// Multiply and user lower result. +// Rd=add(#u6,mpyi(Rs,#U6)) +def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst), + (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3), + "$dst = add(#$src1, mpyi($src2, #$src3))", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, u6ImmPred:$src3), u6ImmPred:$src1))]>, + Requires<[HasV4T]>; + +// Rd=add(#u6,mpyi(Rs,Rt)) + +def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst), + (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst = add(#$src1, mpyi($src2, $src3))", + [(set IntRegs:$dst, + (add (mul IntRegs:$src2, IntRegs:$src3), u6ImmPred:$src1))]>, + Requires<[HasV4T]>; + +// Rd=add(Ru,mpyi(#u6:2,Rs)) +def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3), + "$dst = add($src1, mpyi(#$src2, $src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (mul IntRegs:$src3, u6_2ImmPred:$src2)))]>, + Requires<[HasV4T]>; + +// Rd=add(Ru,mpyi(Rs,#u6)) +def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3), + "$dst = add($src1, mpyi($src2, #$src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (mul IntRegs:$src2, u6ImmPred:$src3)))]>, + Requires<[HasV4T]>; + +// Rx=add(Ru,mpyi(Rx,Rs)) +def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3), + "$dst = add($src1, mpyi($src2, $src3))", + [(set IntRegs:$dst, + (add IntRegs:$src1, (mul IntRegs:$src2, IntRegs:$src3)))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +// Polynomial multiply words +// Rdd=pmpyw(Rs,Rt) +// Rxx^=pmpyw(Rs,Rt) + +// Vector reduce multiply word by signed half (32x16) +// Rdd=vrmpyweh(Rss,Rtt)[:<<1] +// Rdd=vrmpywoh(Rss,Rtt)[:<<1] +// Rxx+=vrmpyweh(Rss,Rtt)[:<<1] +// Rxx+=vrmpywoh(Rss,Rtt)[:<<1] + +// Multiply and use upper result +// Rd=mpy(Rs,Rt.H):<<1:sat +// Rd=mpy(Rs,Rt.L):<<1:sat +// Rd=mpy(Rs,Rt):<<1 +// Rd=mpy(Rs,Rt):<<1:sat +// Rd=mpysu(Rs,Rt) +// Rx+=mpy(Rs,Rt):<<1:sat +// Rx-=mpy(Rs,Rt):<<1:sat + +// Vector multiply bytes +// Rdd=vmpybsu(Rs,Rt) +// Rdd=vmpybu(Rs,Rt) +// Rxx+=vmpybsu(Rs,Rt) +// Rxx+=vmpybu(Rs,Rt) + +// Vector polynomial multiply halfwords +// Rdd=vpmpyh(Rs,Rt) +// Rxx^=vpmpyh(Rs,Rt) + +//===----------------------------------------------------------------------===// +// XTYPE/MPY - +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT + +//===----------------------------------------------------------------------===// + +// Shift by immediate and accumulate. +// Rx=add(#u8,asl(Rx,#U5)) +def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = add(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (add (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=add(#u8,lsr(Rx,#U5)) +def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = add(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (add (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=sub(#u8,asl(Rx,#U5)) +def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = sub(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (sub (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +// Rx=sub(#u8,lsr(Rx,#U5)) +def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = sub(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (sub (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +//Shift by immediate and logical. +//Rx=and(#u8,asl(Rx,#U5)) +def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = and(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (and (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=and(#u8,lsr(Rx,#U5)) +def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = and(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (and (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=or(#u8,asl(Rx,#U5)) +def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = or(#$src1, asl($src2, #$src3))", + [(set IntRegs:$dst, + (or (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + +//Rx=or(#u8,lsr(Rx,#U5)) +def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst), + (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3), + "$dst = or(#$src1, lsr($src2, #$src3))", + [(set IntRegs:$dst, + (or (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))], + "$src2 = $dst">, + Requires<[HasV4T]>; + + +//Shift by register. +//Rd=lsl(#s6,Rt) +def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2), + "$dst = lsl(#$src1, $src2)", + [(set IntRegs:$dst, (shl s6ImmPred:$src1, IntRegs:$src2))]>, + Requires<[HasV4T]>; + + +//Shift by register and logical. +//Rxx^=asl(Rss,Rt) +def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= asl($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=asr(Rss,Rt) +def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= asr($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (sra DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=lsl(Rss,Rt) +def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= lsl($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + +//Rxx^=lsr(Rss,Rt) +def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3), + "$dst ^= lsr($src2, $src3)", + [(set DoubleRegs:$dst, + (xor DoubleRegs:$src1, (srl DoubleRegs:$src2, IntRegs:$src3)))], + "$src1 = $dst">, + Requires<[HasV4T]>; + + +//===----------------------------------------------------------------------===// +// XTYPE/SHIFT - +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MEMOP: Word, Half, Byte +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MEMOP: Word +// +// Implemented: +// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 +// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 +// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt +// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt +// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt +// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt +// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5 +// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5 +// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt +// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt +// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt +// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt +// +// Not implemented: +// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) +// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +//===----------------------------------------------------------------------===// + + +// MEMw_ADDSUBi_indexed_V4: +// pseudo operation for MEMw_ADDi_indexed_V4 and +// MEMw_SUBi_indexed_V4 a later pass will change it +// to the corresponding pattern. +let AddedComplexity = 30 in +def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend), + "Error; should not emit", + [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)), +m6ImmPred:$addend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += #U5 +let AddedComplexity = 30 in +def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend), + "memw($base+#$offset) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= #U5 +let AddedComplexity = 30 in +def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend), + "memw($base+#$offset) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += Rt +let AddedComplexity = 30 in +def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend), + "memw($base+#$offset) += $addend", + [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)), +IntRegs:$addend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= Rt +let AddedComplexity = 30 in +def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend), + "memw($base+#$offset) -= $subend", + [(store (sub (load (add IntRegs:$base, u6_2ImmPred:$offset)), +IntRegs:$subend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) &= Rt +let AddedComplexity = 30 in +def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend), + "memw($base+#$offset) += $andend", + [(store (and (load (add IntRegs:$base, u6_2ImmPred:$offset)), +IntRegs:$andend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) |= Rt +let AddedComplexity = 30 in +def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend), + "memw($base+#$offset) |= $orend", + [(store (or (load (add IntRegs:$base, u6_2ImmPred:$offset)), + IntRegs:$orend), + (add IntRegs:$base, u6_2ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// MEMw_ADDSUBi_V4: +// Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4 +// a later pass will change it to the right pattern. +let AddedComplexity = 30 in +def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, m6Imm:$addend), + "Error; should not emit", + [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += #U5 +let AddedComplexity = 30 in +def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$addend), + "memw($addr) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= #U5 +let AddedComplexity = 30 in +def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$subend), + "memw($addr) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) += Rt +let AddedComplexity = 30 in +def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$addend), + "memw($addr) += $addend", + [(store (add (load ADDRriU6_2:$addr), IntRegs:$addend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) -= Rt +let AddedComplexity = 30 in +def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$subend), + "memw($addr) -= $subend", + [(store (sub (load ADDRriU6_2:$addr), IntRegs:$subend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) &= Rt +let AddedComplexity = 30 in +def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$andend), + "memw($addr) &= $andend", + [(store (and (load ADDRriU6_2:$addr), IntRegs:$andend), + ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memw(Rs+#u6:2) |= Rt +let AddedComplexity = 30 in +def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$orend), + "memw($addr) |= $orend", + [(store (or (load ADDRriU6_2:$addr), IntRegs:$orend), +ADDRriU6_2:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +//===----------------------------------------------------------------------===// +// MEMOP: Halfword +// +// Implemented: +// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 +// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 +// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt +// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt +// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt +// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt +// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5 +// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5 +// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt +// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt +// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt +// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt +// +// Not implemented: +// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) +// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) +// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +//===----------------------------------------------------------------------===// + + +// MEMh_ADDSUBi_indexed_V4: +// Pseudo operation for MEMh_ADDi_indexed_V4 and +// MEMh_SUBi_indexed_V4 a later pass will change it +// to the corresponding pattern. +let AddedComplexity = 30 in +def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend), + "Error; should not emit", + [(truncstorei16 (add (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + m6ImmPred:$addend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += #U5 +let AddedComplexity = 30 in +def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend), + "memh($base+#$offset) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= #U5 +let AddedComplexity = 30 in +def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend), + "memh($base+#$offset) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += Rt +let AddedComplexity = 30 in +def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend), + "memh($base+#$offset) += $addend", + [(truncstorei16 (add (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$addend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= Rt +let AddedComplexity = 30 in +def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend), + "memh($base+#$offset) -= $subend", + [(truncstorei16 (sub (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$subend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) &= Rt +let AddedComplexity = 30 in +def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend), + "memh($base+#$offset) += $andend", + [(truncstorei16 (and (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$andend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) |= Rt +let AddedComplexity = 30 in +def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend), + "memh($base+#$offset) |= $orend", + [(truncstorei16 (or (sextloadi16 (add IntRegs:$base, + u6_1ImmPred:$offset)), + IntRegs:$orend), + (add IntRegs:$base, u6_1ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// MEMh_ADDSUBi_V4: +// Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4 +// a later pass will change it to the right pattern. +let AddedComplexity = 30 in +def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, m6Imm:$addend), + "Error; should not emit", + [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr), + m6ImmPred:$addend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += #U5 +let AddedComplexity = 30 in +def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$addend), + "memh($addr) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= #U5 +let AddedComplexity = 30 in +def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$subend), + "memh($addr) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) += Rt +let AddedComplexity = 30 in +def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$addend), + "memh($addr) += $addend", + [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$addend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) -= Rt +let AddedComplexity = 30 in +def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$subend), + "memh($addr) -= $subend", + [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$subend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) &= Rt +let AddedComplexity = 30 in +def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$andend), + "memh($addr) &= $andend", + [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$andend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memh(Rs+#u6:1) |= Rt +let AddedComplexity = 30 in +def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$orend), + "memh($addr) |= $orend", + [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr), + IntRegs:$orend), ADDRriU6_1:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + + +//===----------------------------------------------------------------------===// +// MEMOP: Byte +// +// Implemented: +// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5 +// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5 +// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt +// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt +// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt +// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt +// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5 +// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5 +// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt +// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt +// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt +// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt +// +// Not implemented: +// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5) +// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5) +// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5) +// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5) +//===----------------------------------------------------------------------===// + + +// MEMb_ADDSUBi_indexed_V4: +// Pseudo operation for MEMb_ADDi_indexed_V4 and +// MEMb_SUBi_indexed_V4 a later pass will change it +// to the corresponding pattern. +let AddedComplexity = 30 in +def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend), + "Error; should not emit", + [(truncstorei8 (add (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + m6ImmPred:$addend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += #U5 +let AddedComplexity = 30 in +def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend), + "memb($base+#$offset) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= #U5 +let AddedComplexity = 30 in +def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend), + "memb($base+#$offset) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += Rt +let AddedComplexity = 30 in +def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend), + "memb($base+#$offset) += $addend", + [(truncstorei8 (add (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$addend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= Rt +let AddedComplexity = 30 in +def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend), + "memb($base+#$offset) -= $subend", + [(truncstorei8 (sub (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$subend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) &= Rt +let AddedComplexity = 30 in +def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend), + "memb($base+#$offset) += $andend", + [(truncstorei8 (and (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$andend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) |= Rt +let AddedComplexity = 30 in +def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), + (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend), + "memb($base+#$offset) |= $orend", + [(truncstorei8 (or (sextloadi8 (add IntRegs:$base, + u6_0ImmPred:$offset)), + IntRegs:$orend), + (add IntRegs:$base, u6_0ImmPred:$offset))]>, + Requires<[HasV4T, UseMEMOP]>; + +// MEMb_ADDSUBi_V4: +// Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4 +// a later pass will change it to the right pattern. +let AddedComplexity = 30 in +def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, m6Imm:$addend), + "Error; should not emit", + [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr), + m6ImmPred:$addend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += #U5 +let AddedComplexity = 30 in +def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$addend), + "memb($addr) += $addend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= #U5 +let AddedComplexity = 30 in +def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, u5Imm:$subend), + "memb($addr) -= $subend", + []>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) += Rt +let AddedComplexity = 30 in +def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$addend), + "memb($addr) += $addend", + [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$addend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) -= Rt +let AddedComplexity = 30 in +def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$subend), + "memb($addr) -= $subend", + [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$subend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) &= Rt +let AddedComplexity = 30 in +def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$andend), + "memb($addr) &= $andend", + [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$andend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + +// memb(Rs+#u6:0) |= Rt +let AddedComplexity = 30 in +def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), + (ins MEMri:$addr, IntRegs:$orend), + "memb($addr) |= $orend", + [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr), + IntRegs:$orend), ADDRriU6_0:$addr)]>, + Requires<[HasV4T, UseMEMOP]>; + + +//===----------------------------------------------------------------------===// +// XTYPE/PRED + +//===----------------------------------------------------------------------===// + +// Hexagon V4 only supports these flavors of byte/half compare instructions: +// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by +// hardware. However, compiler can still implement these patterns through +// appropriate patterns combinations based on current implemented patterns. +// The implemented patterns are: EQ/GT/GTU. +// Missing patterns are: GE/GEU/LT/LTU/LE/LEU. + +// Pd=cmpb.eq(Rs,#u8) +let isCompare = 1 in +def CMPbEQri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u8Imm:$src2), + "$dst = cmpb.eq($src1, #$src2)", + [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 255), + u8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.eq(Rs,Rt) +let isCompare = 1 in +def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1, + IntRegs:$src2), + 255), + 0))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.eq(Rs,Rt) +let isCompare = 1 in +def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 24)), + (shl IntRegs:$src2, (i32 24))))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gt(Rs,#s8) +let isCompare = 1 in +def CMPbGTri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, s32Imm:$src2), + "$dst = cmpb.gt($src1, #$src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)), + s32_24ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gt(Rs,Rt) +let isCompare = 1 in +def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.gt($src1, $src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)), + (shl IntRegs:$src2, (i32 24))))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gtu(Rs,#u7) +let isCompare = 1 in +def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u7Imm:$src2), + "$dst = cmpb.gtu($src1, #$src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255), + u7ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Pd=cmpb.gtu(Rs,Rt) +let isCompare = 1 in +def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmpb.gtu($src1, $src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255), + (and IntRegs:$src2, 255)))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) ri. +// Pd=cmph.eq(Rs,#s8) +let isCompare = 1 in +def CMPhEQri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u16Imm:$src2), + "$dst = cmph.eq($src1, #$src2)", + [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 65535), + u16_s8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) rr. +// Case 1: xor + and, then compare: +// r0=xor(r0,r1) +// r0=and(r0,#0xffff) +// p0=cmp.eq(r0,#0) +// Pd=cmph.eq(Rs,Rt) +let isCompare = 1 in +def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1, + IntRegs:$src2), + 65535), + 0))]>, + Requires<[HasV4T]>; + +// Signed half compare(.eq) rr. +// Case 2: shift left 16 bits then compare: +// r0=asl(r0,16) +// r1=asl(r1,16) +// p0=cmp.eq(r0,r1) +// Pd=cmph.eq(Rs,Rt) +let isCompare = 1 in +def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.eq($src1, $src2)", + [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 16)), + (shl IntRegs:$src2, (i32 16))))]>, + Requires<[HasV4T]>; + +// Signed half compare(.gt) ri. +// Pd=cmph.gt(Rs,#s8) +let isCompare = 1 in +def CMPhGTri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, s32Imm:$src2), + "$dst = cmph.gt($src1, #$src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)), + s32_16s8ImmPred:$src2))]>, + Requires<[HasV4T]>; + +// Signed half compare(.gt) rr. +// Pd=cmph.gt(Rs,Rt) +let isCompare = 1 in +def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.gt($src1, $src2)", + [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)), + (shl IntRegs:$src2, (i32 16))))]>, + Requires<[HasV4T]>; + +// Unsigned half compare rr (.gtu). +// Pd=cmph.gtu(Rs,Rt) +let isCompare = 1 in +def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = cmph.gtu($src1, $src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535), + (and IntRegs:$src2, 65535)))]>, + Requires<[HasV4T]>; + +// Unsigned half compare ri (.gtu). +// Pd=cmph.gtu(Rs,#u7) +let isCompare = 1 in +def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst), + (ins IntRegs:$src1, u7Imm:$src2), + "$dst = cmph.gtu($src1, #$src2)", + [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535), + u7ImmPred:$src2))]>, + Requires<[HasV4T]>; + +//===----------------------------------------------------------------------===// +// XTYPE/PRED - +//===----------------------------------------------------------------------===// + +//Deallocate frame and return. +// dealloc_return +let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1), + "dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (Ps) dealloc_return +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1), + "if ($src1) dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps) dealloc_return +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if (!$src1) dealloc_return", + []>, + Requires<[HasV4T]>; +} + +// if (Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if ($src1.new) dealloc_return:nt", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if (!$src1.new) dealloc_return:nt", + []>, + Requires<[HasV4T]>; +} + +// if (Ps.new) dealloc_return:t +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if ($src1.new) dealloc_return:t", + []>, + Requires<[HasV4T]>; +} + +// if (!Ps.new) dealloc_return:nt +let isReturn = 1, isTerminator = 1, + Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in { + def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, + i32imm:$amt1), + "if (!$src1.new) dealloc_return:t", + []>, + Requires<[HasV4T]>; +} diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td new file mode 100644 index 0000000..1328eba --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -0,0 +1,3462 @@ +//===- HexagonIntrinsics.td - Instruction intrinsics -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V2 Architecture +// Application-Level Specification +// 80-V9418-8 Rev. B +// March 4, 2008 +//===----------------------------------------------------------------------===// + +// +// ALU 32 types. +// + +class qi_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_siu8<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_ALU32_siu9<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_qisisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_ALU32_qis8si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, + IntRegs:$src3))]>; + +class si_ALU32_qisis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_ALU32_qis8s8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; + +class si_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sisi_sat<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sisi_rnd<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU32_sis16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_s10si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), + [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; + +class si_lo_ALU32_siu16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), + !strconcat("$dst.l = ", !strconcat(opc , "#$src2")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_hi_ALU32_siu16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2), + !strconcat("$dst.h = ", !strconcat(opc , "#$src2")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_s16<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1")), + [(set IntRegs:$dst, (IntID imm:$src1))]>; + +class di_ALU32_s8<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1), + !strconcat("$dst = ", !strconcat(opc , "#$src1")), + [(set DoubleRegs:$dst, (IntID imm:$src1))]>; + +class di_ALU64_di<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_ALU32_si<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_ALU32_si_tfr<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +// +// ALU 64 types. +// + +class si_ALU64_si_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_ALU64_sidi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; + +class di_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_qididi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2, + DoubleRegs:$src3))]>; + +class di_ALU64_sisi<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_ALU64_didi_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_rnd<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_crnd<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class qi_ALU64_didi<string opc, Intrinsic IntID> + : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_ALU64_sisi<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):sat:<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_lh<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_ll<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_ALU64_sisi_sat<string opc, Intrinsic IntID> + : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +// +// SInst classes. +// + +class qi_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class qi_SInst_qi_pxfer<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class qi_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_qiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class di_SInst_di_sat<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_SInst_di<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; + +class si_SInst_di_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>; + +class di_SInst_disi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_didi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1), + !strconcat("$dst = ", !strconcat(opc , "($src1)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>; + +class si_SInst_sisiu3<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_SInst_diu5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_disi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class si_SInst_sidi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>; + +class di_SInst_disisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_siu5<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_SInst_siu6<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_si_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class di_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "($src)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_qi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set IntRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_si<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src), + !strconcat("$dst = ", !strconcat(opc , "$src")), + [(set PredRegs:$dst, (IntID IntRegs:$src))]>; + +class si_SInst_sisi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_SInst_diu6<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5_rnd<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_SInst_siu5u5<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>; + +class si_SInst_sisisi_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5u5<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class si_SInst_sisidi<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6u6<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2, u6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class di_SInst_dididi<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_diu6u6<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2, + u6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2, + imm:$src3))]>; + +class di_SInst_didisi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_didiqi<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + IntRegs:$src3))]>; + +class di_SInst_didiu3<string opc, Intrinsic IntID> + : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2, + u3Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2, + imm:$src3))]>; + +class di_SInst_didisi_or<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didisi_and<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_and<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_or<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_xor<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u6Imm:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_and<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisisi_or<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + + +class si_SInst_sisiu5_and<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_or<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_xor<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class si_SInst_sisiu5_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u5Imm:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_acc<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u5Imm:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, imm:$src2))], + "$dst2 = $dst">; + +class di_SInst_didiu6_nac<string opc, Intrinsic IntID> + : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + u5Imm:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + imm:$src2))], + "$dst2 = $dst">; + + +// +// MInst classes. +// + +class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):<<1:rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):rnd")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_disisi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_s8s8<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>; + +class si_MInst_sisi<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_lh<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hl<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_ll<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + + +class si_MInst_sisi_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_up<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):<<1:sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class si_SInst_sisi_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2*):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisisi_xacc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + IntRegs:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisis8_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisis8_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + s8Imm:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu4u5<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + u4Imm:$src2, u5Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, #$src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + imm:$src2, imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu8_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + u8Imm:$src3), + !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisiu8_nac<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2, + u8Imm:$src3), + !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2, + imm:$src3))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, + DoubleRegs:$src2))]>; + +class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.H):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.L, $src2.L):<<1:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , + "($src1.H, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.H, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.H, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.L, $src2.H):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst -= ", + !strconcat(opc , "($src1.L, $src2.L):<<1")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class di_MInst_disi_s1_sat<string opc, Intrinsic IntID> + : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + IntRegs:$src2), + !strconcat("$dst += ", + !strconcat(opc , "($src1, $src2):<<1:sat")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, + DoubleRegs:$src1, + IntRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", + !strconcat(opc , "($src1, $src2):<<1:rnd:sat")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>; + +class si_MInst_didi<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ + +// ALU32 / ALU / Add. +def Hexagon_A2_add: + si_ALU32_sisi <"add", int_hexagon_A2_add>; +def Hexagon_A2_addi: + si_ALU32_sis16 <"add", int_hexagon_A2_addi>; + +// ALU32 / ALU / Logical operations. +def Hexagon_A2_and: + si_ALU32_sisi <"and", int_hexagon_A2_and>; +def Hexagon_A2_andir: + si_ALU32_sis10 <"and", int_hexagon_A2_andir>; +def Hexagon_A2_not: + si_ALU32_si <"not", int_hexagon_A2_not>; +def Hexagon_A2_or: + si_ALU32_sisi <"or", int_hexagon_A2_or>; +def Hexagon_A2_orir: + si_ALU32_sis10 <"or", int_hexagon_A2_orir>; +def Hexagon_A2_xor: + si_ALU32_sisi <"xor", int_hexagon_A2_xor>; + +// ALU32 / ALU / Negate. +def Hexagon_A2_neg: + si_ALU32_si <"neg", int_hexagon_A2_neg>; + +// ALU32 / ALU / Subtract. +def Hexagon_A2_sub: + si_ALU32_sisi <"sub", int_hexagon_A2_sub>; +def Hexagon_A2_subri: + si_ALU32_s10si <"sub", int_hexagon_A2_subri>; + +// ALU32 / ALU / Transfer Immediate. +def Hexagon_A2_tfril: + si_lo_ALU32_siu16 <"", int_hexagon_A2_tfril>; +def Hexagon_A2_tfrih: + si_hi_ALU32_siu16 <"", int_hexagon_A2_tfrih>; +def Hexagon_A2_tfrsi: + si_ALU32_s16 <"", int_hexagon_A2_tfrsi>; +def Hexagon_A2_tfrpi: + di_ALU32_s8 <"", int_hexagon_A2_tfrpi>; + +// ALU32 / ALU / Transfer Register. +def Hexagon_A2_tfr: + si_ALU32_si_tfr <"", int_hexagon_A2_tfr>; + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ + +// ALU32 / PERM / Combine. +def Hexagon_A2_combinew: + di_ALU32_sisi <"combine", int_hexagon_A2_combinew>; +def Hexagon_A2_combine_hh: + si_MInst_sisi_hh <"combine", int_hexagon_A2_combine_hh>; +def Hexagon_A2_combine_lh: + si_MInst_sisi_lh <"combine", int_hexagon_A2_combine_lh>; +def Hexagon_A2_combine_hl: + si_MInst_sisi_hl <"combine", int_hexagon_A2_combine_hl>; +def Hexagon_A2_combine_ll: + si_MInst_sisi_ll <"combine", int_hexagon_A2_combine_ll>; +def Hexagon_A2_combineii: + di_MInst_s8s8 <"combine", int_hexagon_A2_combineii>; + +// ALU32 / PERM / Mux. +def Hexagon_C2_mux: + si_ALU32_qisisi <"mux", int_hexagon_C2_mux>; +def Hexagon_C2_muxri: + si_ALU32_qis8si <"mux", int_hexagon_C2_muxri>; +def Hexagon_C2_muxir: + si_ALU32_qisis8 <"mux", int_hexagon_C2_muxir>; +def Hexagon_C2_muxii: + si_ALU32_qis8s8 <"mux", int_hexagon_C2_muxii>; + +// ALU32 / PERM / Shift halfword. +def Hexagon_A2_aslh: + si_ALU32_si <"aslh", int_hexagon_A2_aslh>; +def Hexagon_A2_asrh: + si_ALU32_si <"asrh", int_hexagon_A2_asrh>; +def SI_to_SXTHI_asrh: + si_ALU32_si <"asrh", int_hexagon_SI_to_SXTHI_asrh>; + +// ALU32 / PERM / Sign/zero extend. +def Hexagon_A2_sxth: + si_ALU32_si <"sxth", int_hexagon_A2_sxth>; +def Hexagon_A2_sxtb: + si_ALU32_si <"sxtb", int_hexagon_A2_sxtb>; +def Hexagon_A2_zxth: + si_ALU32_si <"zxth", int_hexagon_A2_zxth>; +def Hexagon_A2_zxtb: + si_ALU32_si <"zxtb", int_hexagon_A2_zxtb>; + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ + +// ALU32 / PRED / Compare. +def Hexagon_C2_cmpeq: + qi_ALU32_sisi <"cmp.eq", int_hexagon_C2_cmpeq>; +def Hexagon_C2_cmpeqi: + qi_ALU32_sis10 <"cmp.eq", int_hexagon_C2_cmpeqi>; +def Hexagon_C2_cmpgei: + qi_ALU32_sis8 <"cmp.ge", int_hexagon_C2_cmpgei>; +def Hexagon_C2_cmpgeui: + qi_ALU32_siu8 <"cmp.geu", int_hexagon_C2_cmpgeui>; +def Hexagon_C2_cmpgt: + qi_ALU32_sisi <"cmp.gt", int_hexagon_C2_cmpgt>; +def Hexagon_C2_cmpgti: + qi_ALU32_sis10 <"cmp.gt", int_hexagon_C2_cmpgti>; +def Hexagon_C2_cmpgtu: + qi_ALU32_sisi <"cmp.gtu", int_hexagon_C2_cmpgtu>; +def Hexagon_C2_cmpgtui: + qi_ALU32_siu9 <"cmp.gtu", int_hexagon_C2_cmpgtui>; +def Hexagon_C2_cmplt: + qi_ALU32_sisi <"cmp.lt", int_hexagon_C2_cmplt>; +def Hexagon_C2_cmpltu: + qi_ALU32_sisi <"cmp.ltu", int_hexagon_C2_cmpltu>; + +/******************************************************************** +* ALU32/VH * +*********************************************************************/ + +// ALU32 / VH / Vector add halfwords. +// Rd32=vadd[u]h(Rs32,Rt32:sat] +def Hexagon_A2_svaddh: + si_ALU32_sisi <"vaddh", int_hexagon_A2_svaddh>; +def Hexagon_A2_svaddhs: + si_ALU32_sisi_sat <"vaddh", int_hexagon_A2_svaddhs>; +def Hexagon_A2_svadduhs: + si_ALU32_sisi_sat <"vadduh", int_hexagon_A2_svadduhs>; + +// ALU32 / VH / Vector average halfwords. +def Hexagon_A2_svavgh: + si_ALU32_sisi <"vavgh", int_hexagon_A2_svavgh>; +def Hexagon_A2_svavghs: + si_ALU32_sisi_rnd <"vavgh", int_hexagon_A2_svavghs>; +def Hexagon_A2_svnavgh: + si_ALU32_sisi <"vnavgh", int_hexagon_A2_svnavgh>; + +// ALU32 / VH / Vector subtract halfwords. +def Hexagon_A2_svsubh: + si_ALU32_sisi <"vsubh", int_hexagon_A2_svsubh>; +def Hexagon_A2_svsubhs: + si_ALU32_sisi_sat <"vsubh", int_hexagon_A2_svsubhs>; +def Hexagon_A2_svsubuhs: + si_ALU32_sisi_sat <"vsubuh", int_hexagon_A2_svsubuhs>; + +/******************************************************************** +* ALU64/ALU * +*********************************************************************/ + +// ALU64 / ALU / Add. +def Hexagon_A2_addp: + di_ALU64_didi <"add", int_hexagon_A2_addp>; +def Hexagon_A2_addsat: + si_ALU64_sisi_sat <"add", int_hexagon_A2_addsat>; + +// ALU64 / ALU / Add halfword. +// Even though the definition says hl, it should be lh - +//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. +def Hexagon_A2_addh_l16_hl: + si_ALU64_sisi_l16_lh <"add", int_hexagon_A2_addh_l16_hl>; +def Hexagon_A2_addh_l16_ll: + si_ALU64_sisi_l16_ll <"add", int_hexagon_A2_addh_l16_ll>; + +def Hexagon_A2_addh_l16_sat_hl: + si_ALU64_sisi_l16_sat_lh <"add", int_hexagon_A2_addh_l16_sat_hl>; +def Hexagon_A2_addh_l16_sat_ll: + si_ALU64_sisi_l16_sat_ll <"add", int_hexagon_A2_addh_l16_sat_ll>; + +def Hexagon_A2_addh_h16_hh: + si_ALU64_sisi_h16_hh <"add", int_hexagon_A2_addh_h16_hh>; +def Hexagon_A2_addh_h16_hl: + si_ALU64_sisi_h16_hl <"add", int_hexagon_A2_addh_h16_hl>; +def Hexagon_A2_addh_h16_lh: + si_ALU64_sisi_h16_lh <"add", int_hexagon_A2_addh_h16_lh>; +def Hexagon_A2_addh_h16_ll: + si_ALU64_sisi_h16_ll <"add", int_hexagon_A2_addh_h16_ll>; + +def Hexagon_A2_addh_h16_sat_hh: + si_ALU64_sisi_h16_sat_hh <"add", int_hexagon_A2_addh_h16_sat_hh>; +def Hexagon_A2_addh_h16_sat_hl: + si_ALU64_sisi_h16_sat_hl <"add", int_hexagon_A2_addh_h16_sat_hl>; +def Hexagon_A2_addh_h16_sat_lh: + si_ALU64_sisi_h16_sat_lh <"add", int_hexagon_A2_addh_h16_sat_lh>; +def Hexagon_A2_addh_h16_sat_ll: + si_ALU64_sisi_h16_sat_ll <"add", int_hexagon_A2_addh_h16_sat_ll>; + +// ALU64 / ALU / Compare. +def Hexagon_C2_cmpeqp: + qi_ALU64_didi <"cmp.eq", int_hexagon_C2_cmpeqp>; +def Hexagon_C2_cmpgtp: + qi_ALU64_didi <"cmp.gt", int_hexagon_C2_cmpgtp>; +def Hexagon_C2_cmpgtup: + qi_ALU64_didi <"cmp.gtu", int_hexagon_C2_cmpgtup>; + +// ALU64 / ALU / Logical operations. +def Hexagon_A2_andp: + di_ALU64_didi <"and", int_hexagon_A2_andp>; +def Hexagon_A2_orp: + di_ALU64_didi <"or", int_hexagon_A2_orp>; +def Hexagon_A2_xorp: + di_ALU64_didi <"xor", int_hexagon_A2_xorp>; + +// ALU64 / ALU / Maximum. +def Hexagon_A2_max: + si_ALU64_sisi <"max", int_hexagon_A2_max>; +def Hexagon_A2_maxu: + si_ALU64_sisi <"maxu", int_hexagon_A2_maxu>; + +// ALU64 / ALU / Minimum. +def Hexagon_A2_min: + si_ALU64_sisi <"min", int_hexagon_A2_min>; +def Hexagon_A2_minu: + si_ALU64_sisi <"minu", int_hexagon_A2_minu>; + +// ALU64 / ALU / Subtract. +def Hexagon_A2_subp: + di_ALU64_didi <"sub", int_hexagon_A2_subp>; +def Hexagon_A2_subsat: + si_ALU64_sisi_sat <"sub", int_hexagon_A2_subsat>; + +// ALU64 / ALU / Subtract halfword. +// Even though the definition says hl, it should be lh - +//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits. +def Hexagon_A2_subh_l16_hl: + si_ALU64_sisi_l16_lh <"sub", int_hexagon_A2_subh_l16_hl>; +def Hexagon_A2_subh_l16_ll: + si_ALU64_sisi_l16_ll <"sub", int_hexagon_A2_subh_l16_ll>; + +def Hexagon_A2_subh_l16_sat_hl: + si_ALU64_sisi_l16_sat_lh <"sub", int_hexagon_A2_subh_l16_sat_hl>; +def Hexagon_A2_subh_l16_sat_ll: + si_ALU64_sisi_l16_sat_ll <"sub", int_hexagon_A2_subh_l16_sat_ll>; + +def Hexagon_A2_subh_h16_hh: + si_ALU64_sisi_h16_hh <"sub", int_hexagon_A2_subh_h16_hh>; +def Hexagon_A2_subh_h16_hl: + si_ALU64_sisi_h16_hl <"sub", int_hexagon_A2_subh_h16_hl>; +def Hexagon_A2_subh_h16_lh: + si_ALU64_sisi_h16_lh <"sub", int_hexagon_A2_subh_h16_lh>; +def Hexagon_A2_subh_h16_ll: + si_ALU64_sisi_h16_ll <"sub", int_hexagon_A2_subh_h16_ll>; + +def Hexagon_A2_subh_h16_sat_hh: + si_ALU64_sisi_h16_sat_hh <"sub", int_hexagon_A2_subh_h16_sat_hh>; +def Hexagon_A2_subh_h16_sat_hl: + si_ALU64_sisi_h16_sat_hl <"sub", int_hexagon_A2_subh_h16_sat_hl>; +def Hexagon_A2_subh_h16_sat_lh: + si_ALU64_sisi_h16_sat_lh <"sub", int_hexagon_A2_subh_h16_sat_lh>; +def Hexagon_A2_subh_h16_sat_ll: + si_ALU64_sisi_h16_sat_ll <"sub", int_hexagon_A2_subh_h16_sat_ll>; + +// ALU64 / ALU / Transfer register. +def Hexagon_A2_tfrp: + di_ALU64_di <"", int_hexagon_A2_tfrp>; + +/******************************************************************** +* ALU64/BIT * +*********************************************************************/ + +// ALU64 / BIT / Masked parity. +def Hexagon_S2_parityp: + si_ALU64_didi <"parity", int_hexagon_S2_parityp>; + +/******************************************************************** +* ALU64/PERM * +*********************************************************************/ + +// ALU64 / PERM / Vector pack high and low halfwords. +def Hexagon_S2_packhl: + di_ALU64_sisi <"packhl", int_hexagon_S2_packhl>; + +/******************************************************************** +* ALU64/VB * +*********************************************************************/ + +// ALU64 / VB / Vector add unsigned bytes. +def Hexagon_A2_vaddub: + di_ALU64_didi <"vaddub", int_hexagon_A2_vaddub>; +def Hexagon_A2_vaddubs: + di_ALU64_didi_sat <"vaddub", int_hexagon_A2_vaddubs>; + +// ALU64 / VB / Vector average unsigned bytes. +def Hexagon_A2_vavgub: + di_ALU64_didi <"vavgub", int_hexagon_A2_vavgub>; +def Hexagon_A2_vavgubr: + di_ALU64_didi_rnd <"vavgub", int_hexagon_A2_vavgubr>; + +// ALU64 / VB / Vector compare unsigned bytes. +def Hexagon_A2_vcmpbeq: + qi_ALU64_didi <"vcmpb.eq", int_hexagon_A2_vcmpbeq>; +def Hexagon_A2_vcmpbgtu: + qi_ALU64_didi <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>; + +// ALU64 / VB / Vector maximum/minimum unsigned bytes. +def Hexagon_A2_vmaxub: + di_ALU64_didi <"vmaxub", int_hexagon_A2_vmaxub>; +def Hexagon_A2_vminub: + di_ALU64_didi <"vminub", int_hexagon_A2_vminub>; + +// ALU64 / VB / Vector subtract unsigned bytes. +def Hexagon_A2_vsubub: + di_ALU64_didi <"vsubub", int_hexagon_A2_vsubub>; +def Hexagon_A2_vsububs: + di_ALU64_didi_sat <"vsubub", int_hexagon_A2_vsububs>; + +// ALU64 / VB / Vector mux. +def Hexagon_C2_vmux: + di_ALU64_qididi <"vmux", int_hexagon_C2_vmux>; + + +/******************************************************************** +* ALU64/VH * +*********************************************************************/ + +// ALU64 / VH / Vector add halfwords. +// Rdd64=vadd[u]h(Rss64,Rtt64:sat] +def Hexagon_A2_vaddh: + di_ALU64_didi <"vaddh", int_hexagon_A2_vaddh>; +def Hexagon_A2_vaddhs: + di_ALU64_didi_sat <"vaddh", int_hexagon_A2_vaddhs>; +def Hexagon_A2_vadduhs: + di_ALU64_didi_sat <"vadduh", int_hexagon_A2_vadduhs>; + +// ALU64 / VH / Vector average halfwords. +// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat] +def Hexagon_A2_vavgh: + di_ALU64_didi <"vavgh", int_hexagon_A2_vavgh>; +def Hexagon_A2_vavghcr: + di_ALU64_didi_crnd <"vavgh", int_hexagon_A2_vavghcr>; +def Hexagon_A2_vavghr: + di_ALU64_didi_rnd <"vavgh", int_hexagon_A2_vavghr>; +def Hexagon_A2_vavguh: + di_ALU64_didi <"vavguh", int_hexagon_A2_vavguh>; +def Hexagon_A2_vavguhr: + di_ALU64_didi_rnd <"vavguh", int_hexagon_A2_vavguhr>; +def Hexagon_A2_vnavgh: + di_ALU64_didi <"vnavgh", int_hexagon_A2_vnavgh>; +def Hexagon_A2_vnavghcr: + di_ALU64_didi_crnd_sat <"vnavgh", int_hexagon_A2_vnavghcr>; +def Hexagon_A2_vnavghr: + di_ALU64_didi_rnd_sat <"vnavgh", int_hexagon_A2_vnavghr>; + +// ALU64 / VH / Vector compare halfwords. +def Hexagon_A2_vcmpheq: + qi_ALU64_didi <"vcmph.eq", int_hexagon_A2_vcmpheq>; +def Hexagon_A2_vcmphgt: + qi_ALU64_didi <"vcmph.gt", int_hexagon_A2_vcmphgt>; +def Hexagon_A2_vcmphgtu: + qi_ALU64_didi <"vcmph.gtu",int_hexagon_A2_vcmphgtu>; + +// ALU64 / VH / Vector maximum halfwords. +def Hexagon_A2_vmaxh: + di_ALU64_didi <"vmaxh", int_hexagon_A2_vmaxh>; +def Hexagon_A2_vmaxuh: + di_ALU64_didi <"vmaxuh", int_hexagon_A2_vmaxuh>; + +// ALU64 / VH / Vector minimum halfwords. +def Hexagon_A2_vminh: + di_ALU64_didi <"vminh", int_hexagon_A2_vminh>; +def Hexagon_A2_vminuh: + di_ALU64_didi <"vminuh", int_hexagon_A2_vminuh>; + +// ALU64 / VH / Vector subtract halfwords. +def Hexagon_A2_vsubh: + di_ALU64_didi <"vsubh", int_hexagon_A2_vsubh>; +def Hexagon_A2_vsubhs: + di_ALU64_didi_sat <"vsubh", int_hexagon_A2_vsubhs>; +def Hexagon_A2_vsubuhs: + di_ALU64_didi_sat <"vsubuh", int_hexagon_A2_vsubuhs>; + + +/******************************************************************** +* ALU64/VW * +*********************************************************************/ + +// ALU64 / VW / Vector add words. +// Rdd32=vaddw(Rss32,Rtt32)[:sat] +def Hexagon_A2_vaddw: + di_ALU64_didi <"vaddw", int_hexagon_A2_vaddw>; +def Hexagon_A2_vaddws: + di_ALU64_didi_sat <"vaddw", int_hexagon_A2_vaddws>; + +// ALU64 / VW / Vector average words. +def Hexagon_A2_vavguw: + di_ALU64_didi <"vavguw", int_hexagon_A2_vavguw>; +def Hexagon_A2_vavguwr: + di_ALU64_didi_rnd <"vavguw", int_hexagon_A2_vavguwr>; +def Hexagon_A2_vavgw: + di_ALU64_didi <"vavgw", int_hexagon_A2_vavgw>; +def Hexagon_A2_vavgwcr: + di_ALU64_didi_crnd <"vavgw", int_hexagon_A2_vavgwcr>; +def Hexagon_A2_vavgwr: + di_ALU64_didi_rnd <"vavgw", int_hexagon_A2_vavgwr>; +def Hexagon_A2_vnavgw: + di_ALU64_didi <"vnavgw", int_hexagon_A2_vnavgw>; +def Hexagon_A2_vnavgwcr: + di_ALU64_didi_crnd_sat <"vnavgw", int_hexagon_A2_vnavgwcr>; +def Hexagon_A2_vnavgwr: + di_ALU64_didi_rnd_sat <"vnavgw", int_hexagon_A2_vnavgwr>; + +// ALU64 / VW / Vector compare words. +def Hexagon_A2_vcmpweq: + qi_ALU64_didi <"vcmpw.eq", int_hexagon_A2_vcmpweq>; +def Hexagon_A2_vcmpwgt: + qi_ALU64_didi <"vcmpw.gt", int_hexagon_A2_vcmpwgt>; +def Hexagon_A2_vcmpwgtu: + qi_ALU64_didi <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>; + +// ALU64 / VW / Vector maximum words. +def Hexagon_A2_vmaxw: + di_ALU64_didi <"vmaxw", int_hexagon_A2_vmaxw>; +def Hexagon_A2_vmaxuw: + di_ALU64_didi <"vmaxuw", int_hexagon_A2_vmaxuw>; + +// ALU64 / VW / Vector minimum words. +def Hexagon_A2_vminw: + di_ALU64_didi <"vminw", int_hexagon_A2_vminw>; +def Hexagon_A2_vminuw: + di_ALU64_didi <"vminuw", int_hexagon_A2_vminuw>; + +// ALU64 / VW / Vector subtract words. +def Hexagon_A2_vsubw: + di_ALU64_didi <"vsubw", int_hexagon_A2_vsubw>; +def Hexagon_A2_vsubws: + di_ALU64_didi_sat <"vsubw", int_hexagon_A2_vsubws>; + + +/******************************************************************** +* CR * +*********************************************************************/ + +// CR / Logical reductions on predicates. +def Hexagon_C2_all8: + qi_SInst_qi <"all8", int_hexagon_C2_all8>; +def Hexagon_C2_any8: + qi_SInst_qi <"any8", int_hexagon_C2_any8>; + +// CR / Logical operations on predicates. +def Hexagon_C2_pxfer_map: + qi_SInst_qi_pxfer <"", int_hexagon_C2_pxfer_map>; +def Hexagon_C2_and: + qi_SInst_qiqi <"and", int_hexagon_C2_and>; +def Hexagon_C2_andn: + qi_SInst_qiqi_neg <"and", int_hexagon_C2_andn>; +def Hexagon_C2_not: + qi_SInst_qi <"not", int_hexagon_C2_not>; +def Hexagon_C2_or: + qi_SInst_qiqi <"or", int_hexagon_C2_or>; +def Hexagon_C2_orn: + qi_SInst_qiqi_neg <"or", int_hexagon_C2_orn>; +def Hexagon_C2_xor: + qi_SInst_qiqi <"xor", int_hexagon_C2_xor>; + + +/******************************************************************** +* MTYPE/ALU * +*********************************************************************/ + +// MTYPE / ALU / Add and accumulate. +def Hexagon_M2_acci: + si_MInst_sisisi_acc <"add", int_hexagon_M2_acci>; +def Hexagon_M2_accii: + si_MInst_sisis8_acc <"add", int_hexagon_M2_accii>; +def Hexagon_M2_nacci: + si_MInst_sisisi_nac <"add", int_hexagon_M2_nacci>; +def Hexagon_M2_naccii: + si_MInst_sisis8_nac <"add", int_hexagon_M2_naccii>; + +// MTYPE / ALU / Subtract and accumulate. +def Hexagon_M2_subacc: + si_MInst_sisisi_acc <"sub", int_hexagon_M2_subacc>; + +// MTYPE / ALU / Vector absolute difference. +def Hexagon_M2_vabsdiffh: + di_MInst_didi <"vabsdiffh",int_hexagon_M2_vabsdiffh>; +def Hexagon_M2_vabsdiffw: + di_MInst_didi <"vabsdiffw",int_hexagon_M2_vabsdiffw>; + +// MTYPE / ALU / XOR and xor with destination. +def Hexagon_M2_xor_xacc: + si_MInst_sisisi_xacc <"xor", int_hexagon_M2_xor_xacc>; + + +/******************************************************************** +* MTYPE/COMPLEX * +*********************************************************************/ + +// MTYPE / COMPLEX / Complex multiply. +// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat +def Hexagon_M2_cmpys_s1: + di_MInst_sisi_s1_sat <"cmpy", int_hexagon_M2_cmpys_s1>; +def Hexagon_M2_cmpys_s0: + di_MInst_sisi_sat <"cmpy", int_hexagon_M2_cmpys_s0>; +def Hexagon_M2_cmpysc_s1: + di_MInst_sisi_s1_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s1>; +def Hexagon_M2_cmpysc_s0: + di_MInst_sisi_sat_conj <"cmpy", int_hexagon_M2_cmpysc_s0>; + +def Hexagon_M2_cmacs_s1: + di_MInst_disisi_acc_s1_sat <"cmpy", int_hexagon_M2_cmacs_s1>; +def Hexagon_M2_cmacs_s0: + di_MInst_disisi_acc_sat <"cmpy", int_hexagon_M2_cmacs_s0>; +def Hexagon_M2_cmacsc_s1: + di_MInst_disisi_acc_s1_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s1>; +def Hexagon_M2_cmacsc_s0: + di_MInst_disisi_acc_sat_conj <"cmpy", int_hexagon_M2_cmacsc_s0>; + +def Hexagon_M2_cnacs_s1: + di_MInst_disisi_nac_s1_sat <"cmpy", int_hexagon_M2_cnacs_s1>; +def Hexagon_M2_cnacs_s0: + di_MInst_disisi_nac_sat <"cmpy", int_hexagon_M2_cnacs_s0>; +def Hexagon_M2_cnacsc_s1: + di_MInst_disisi_nac_s1_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s1>; +def Hexagon_M2_cnacsc_s0: + di_MInst_disisi_nac_sat_conj <"cmpy", int_hexagon_M2_cnacsc_s0>; + +// MTYPE / COMPLEX / Complex multiply real or imaginary. +def Hexagon_M2_cmpyr_s0: + di_MInst_sisi <"cmpyr", int_hexagon_M2_cmpyr_s0>; +def Hexagon_M2_cmacr_s0: + di_MInst_disisi_acc <"cmpyr", int_hexagon_M2_cmacr_s0>; + +def Hexagon_M2_cmpyi_s0: + di_MInst_sisi <"cmpyi", int_hexagon_M2_cmpyi_s0>; +def Hexagon_M2_cmaci_s0: + di_MInst_disisi_acc <"cmpyi", int_hexagon_M2_cmaci_s0>; + +// MTYPE / COMPLEX / Complex multiply with round and pack. +// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat +def Hexagon_M2_cmpyrs_s0: + si_MInst_sisi_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s0>; +def Hexagon_M2_cmpyrs_s1: + si_MInst_sisi_s1_rnd_sat <"cmpy", int_hexagon_M2_cmpyrs_s1>; + +def Hexagon_M2_cmpyrsc_s0: + si_MInst_sisi_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s0>; +def Hexagon_M2_cmpyrsc_s1: + si_MInst_sisi_s1_rnd_sat_conj <"cmpy", int_hexagon_M2_cmpyrsc_s1>; + +//MTYPE / COMPLEX / Vector complex multiply real or imaginary. +def Hexagon_M2_vcmpy_s0_sat_i: + di_MInst_didi_sat <"vcmpyi", int_hexagon_M2_vcmpy_s0_sat_i>; +def Hexagon_M2_vcmpy_s1_sat_i: + di_MInst_didi_s1_sat <"vcmpyi", int_hexagon_M2_vcmpy_s1_sat_i>; + +def Hexagon_M2_vcmpy_s0_sat_r: + di_MInst_didi_sat <"vcmpyr", int_hexagon_M2_vcmpy_s0_sat_r>; +def Hexagon_M2_vcmpy_s1_sat_r: + di_MInst_didi_s1_sat <"vcmpyr", int_hexagon_M2_vcmpy_s1_sat_r>; + +def Hexagon_M2_vcmac_s0_sat_i: + di_MInst_dididi_acc_sat <"vcmpyi", int_hexagon_M2_vcmac_s0_sat_i>; +def Hexagon_M2_vcmac_s0_sat_r: + di_MInst_dididi_acc_sat <"vcmpyr", int_hexagon_M2_vcmac_s0_sat_r>; + +//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. +def Hexagon_M2_vrcmpyi_s0: + di_MInst_didi <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0>; +def Hexagon_M2_vrcmpyr_s0: + di_MInst_didi <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0>; + +def Hexagon_M2_vrcmpyi_s0c: + di_MInst_didi_conj <"vrcmpyi", int_hexagon_M2_vrcmpyi_s0c>; +def Hexagon_M2_vrcmpyr_s0c: + di_MInst_didi_conj <"vrcmpyr", int_hexagon_M2_vrcmpyr_s0c>; + +def Hexagon_M2_vrcmaci_s0: + di_MInst_dididi_acc <"vrcmpyi", int_hexagon_M2_vrcmaci_s0>; +def Hexagon_M2_vrcmacr_s0: + di_MInst_dididi_acc <"vrcmpyr", int_hexagon_M2_vrcmacr_s0>; + +def Hexagon_M2_vrcmaci_s0c: + di_MInst_dididi_acc_conj <"vrcmpyi", int_hexagon_M2_vrcmaci_s0c>; +def Hexagon_M2_vrcmacr_s0c: + di_MInst_dididi_acc_conj <"vrcmpyr", int_hexagon_M2_vrcmacr_s0c>; + + +/******************************************************************** +* MTYPE/MPYH * +*********************************************************************/ + +// MTYPE / MPYH / Multiply and use lower result. +//def Hexagon_M2_mpysmi: +// si_MInst_sim9 <"mpyi", int_hexagon_M2_mpysmi>; +def Hexagon_M2_mpyi: + si_MInst_sisi <"mpyi", int_hexagon_M2_mpyi>; +def Hexagon_M2_mpyui: + si_MInst_sisi <"mpyui", int_hexagon_M2_mpyui>; +def Hexagon_M2_macsip: + si_MInst_sisiu8_acc <"mpyi", int_hexagon_M2_macsip>; +def Hexagon_M2_maci: + si_MInst_sisisi_acc <"mpyi", int_hexagon_M2_maci>; +def Hexagon_M2_macsin: + si_MInst_sisiu8_nac <"mpyi", int_hexagon_M2_macsin>; + +// MTYPE / MPYH / Multiply word by half (32x16). +//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat] +//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat] +def Hexagon_M2_mmpyl_rs1: + di_MInst_didi_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs1>; +def Hexagon_M2_mmpyl_s1: + di_MInst_didi_s1_sat <"vmpyweh", int_hexagon_M2_mmpyl_s1>; +def Hexagon_M2_mmpyl_rs0: + di_MInst_didi_rnd_sat <"vmpyweh", int_hexagon_M2_mmpyl_rs0>; +def Hexagon_M2_mmpyl_s0: + di_MInst_didi_sat <"vmpyweh", int_hexagon_M2_mmpyl_s0>; +def Hexagon_M2_mmpyh_rs1: + di_MInst_didi_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs1>; +def Hexagon_M2_mmpyh_s1: + di_MInst_didi_s1_sat <"vmpywoh", int_hexagon_M2_mmpyh_s1>; +def Hexagon_M2_mmpyh_rs0: + di_MInst_didi_rnd_sat <"vmpywoh", int_hexagon_M2_mmpyh_rs0>; +def Hexagon_M2_mmpyh_s0: + di_MInst_didi_sat <"vmpywoh", int_hexagon_M2_mmpyh_s0>; +def Hexagon_M2_mmacls_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs1>; +def Hexagon_M2_mmacls_s1: + di_MInst_dididi_acc_s1_sat <"vmpyweh", int_hexagon_M2_mmacls_s1>; +def Hexagon_M2_mmacls_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpyweh", int_hexagon_M2_mmacls_rs0>; +def Hexagon_M2_mmacls_s0: + di_MInst_dididi_acc_sat <"vmpyweh", int_hexagon_M2_mmacls_s0>; +def Hexagon_M2_mmachs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs1>; +def Hexagon_M2_mmachs_s1: + di_MInst_dididi_acc_s1_sat <"vmpywoh", int_hexagon_M2_mmachs_s1>; +def Hexagon_M2_mmachs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpywoh", int_hexagon_M2_mmachs_rs0>; +def Hexagon_M2_mmachs_s0: + di_MInst_dididi_acc_sat <"vmpywoh", int_hexagon_M2_mmachs_s0>; + +// MTYPE / MPYH / Multiply word by unsigned half (32x16). +//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat] +//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat] +def Hexagon_M2_mmpyul_rs1: + di_MInst_didi_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>; +def Hexagon_M2_mmpyul_s1: + di_MInst_didi_s1_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s1>; +def Hexagon_M2_mmpyul_rs0: + di_MInst_didi_rnd_sat <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>; +def Hexagon_M2_mmpyul_s0: + di_MInst_didi_sat <"vmpyweuh", int_hexagon_M2_mmpyul_s0>; +def Hexagon_M2_mmpyuh_rs1: + di_MInst_didi_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>; +def Hexagon_M2_mmpyuh_s1: + di_MInst_didi_s1_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s1>; +def Hexagon_M2_mmpyuh_rs0: + di_MInst_didi_rnd_sat <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>; +def Hexagon_M2_mmpyuh_s0: + di_MInst_didi_sat <"vmpywouh", int_hexagon_M2_mmpyuh_s0>; +def Hexagon_M2_mmaculs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>; +def Hexagon_M2_mmaculs_s1: + di_MInst_dididi_acc_s1_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s1>; +def Hexagon_M2_mmaculs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>; +def Hexagon_M2_mmaculs_s0: + di_MInst_dididi_acc_sat <"vmpyweuh", int_hexagon_M2_mmaculs_s0>; +def Hexagon_M2_mmacuhs_rs1: + di_MInst_dididi_acc_s1_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>; +def Hexagon_M2_mmacuhs_s1: + di_MInst_dididi_acc_s1_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s1>; +def Hexagon_M2_mmacuhs_rs0: + di_MInst_dididi_acc_rnd_sat <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>; +def Hexagon_M2_mmacuhs_s0: + di_MInst_dididi_acc_sat <"vmpywouh", int_hexagon_M2_mmacuhs_s0>; + +// MTYPE / MPYH / Multiply and use upper result. +def Hexagon_M2_hmmpyh_rs1: + si_MInst_sisi_h_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyh_rs1>; +def Hexagon_M2_hmmpyl_rs1: + si_MInst_sisi_l_s1_rnd_sat <"mpy", int_hexagon_M2_hmmpyl_rs1>; +def Hexagon_M2_mpy_up: + si_MInst_sisi <"mpy", int_hexagon_M2_mpy_up>; +def Hexagon_M2_dpmpyss_rnd_s0: + si_MInst_sisi_rnd <"mpy", int_hexagon_M2_dpmpyss_rnd_s0>; +def Hexagon_M2_mpyu_up: + si_MInst_sisi <"mpyu", int_hexagon_M2_mpyu_up>; + +// MTYPE / MPYH / Multiply and use full result. +def Hexagon_M2_dpmpyuu_s0: + di_MInst_sisi <"mpyu", int_hexagon_M2_dpmpyuu_s0>; +def Hexagon_M2_dpmpyuu_acc_s0: + di_MInst_disisi_acc <"mpyu", int_hexagon_M2_dpmpyuu_acc_s0>; +def Hexagon_M2_dpmpyuu_nac_s0: + di_MInst_disisi_nac <"mpyu", int_hexagon_M2_dpmpyuu_nac_s0>; +def Hexagon_M2_dpmpyss_s0: + di_MInst_sisi <"mpy", int_hexagon_M2_dpmpyss_s0>; +def Hexagon_M2_dpmpyss_acc_s0: + di_MInst_disisi_acc <"mpy", int_hexagon_M2_dpmpyss_acc_s0>; +def Hexagon_M2_dpmpyss_nac_s0: + di_MInst_disisi_nac <"mpy", int_hexagon_M2_dpmpyss_nac_s0>; + + +/******************************************************************** +* MTYPE/MPYS * +*********************************************************************/ + +// MTYPE / MPYS / Scalar 16x16 multiply signed. +//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]| +// [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]] +def Hexagon_M2_mpy_hh_s0: + si_MInst_sisi_hh <"mpy", int_hexagon_M2_mpy_hh_s0>; +def Hexagon_M2_mpy_hh_s1: + si_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpy_hh_s1>; +def Hexagon_M2_mpy_rnd_hh_s1: + si_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_rnd_hh_s1>; +def Hexagon_M2_mpy_sat_rnd_hh_s1: + si_MInst_sisi_sat_rnd_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s1>; +def Hexagon_M2_mpy_sat_hh_s1: + si_MInst_sisi_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_sat_hh_s1>; +def Hexagon_M2_mpy_rnd_hh_s0: + si_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpy_rnd_hh_s0>; +def Hexagon_M2_mpy_sat_rnd_hh_s0: + si_MInst_sisi_sat_rnd_hh <"mpy", int_hexagon_M2_mpy_sat_rnd_hh_s0>; +def Hexagon_M2_mpy_sat_hh_s0: + si_MInst_sisi_sat_hh <"mpy", int_hexagon_M2_mpy_sat_hh_s0>; + +def Hexagon_M2_mpy_hl_s0: + si_MInst_sisi_hl <"mpy", int_hexagon_M2_mpy_hl_s0>; +def Hexagon_M2_mpy_hl_s1: + si_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpy_hl_s1>; +def Hexagon_M2_mpy_rnd_hl_s1: + si_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_rnd_hl_s1>; +def Hexagon_M2_mpy_sat_rnd_hl_s1: + si_MInst_sisi_sat_rnd_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s1>; +def Hexagon_M2_mpy_sat_hl_s1: + si_MInst_sisi_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_sat_hl_s1>; +def Hexagon_M2_mpy_rnd_hl_s0: + si_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpy_rnd_hl_s0>; +def Hexagon_M2_mpy_sat_rnd_hl_s0: + si_MInst_sisi_sat_rnd_hl <"mpy", int_hexagon_M2_mpy_sat_rnd_hl_s0>; +def Hexagon_M2_mpy_sat_hl_s0: + si_MInst_sisi_sat_hl <"mpy", int_hexagon_M2_mpy_sat_hl_s0>; + +def Hexagon_M2_mpy_lh_s0: + si_MInst_sisi_lh <"mpy", int_hexagon_M2_mpy_lh_s0>; +def Hexagon_M2_mpy_lh_s1: + si_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpy_lh_s1>; +def Hexagon_M2_mpy_rnd_lh_s1: + si_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_rnd_lh_s1>; +def Hexagon_M2_mpy_sat_rnd_lh_s1: + si_MInst_sisi_sat_rnd_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s1>; +def Hexagon_M2_mpy_sat_lh_s1: + si_MInst_sisi_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_sat_lh_s1>; +def Hexagon_M2_mpy_rnd_lh_s0: + si_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpy_rnd_lh_s0>; +def Hexagon_M2_mpy_sat_rnd_lh_s0: + si_MInst_sisi_sat_rnd_lh <"mpy", int_hexagon_M2_mpy_sat_rnd_lh_s0>; +def Hexagon_M2_mpy_sat_lh_s0: + si_MInst_sisi_sat_lh <"mpy", int_hexagon_M2_mpy_sat_lh_s0>; + +def Hexagon_M2_mpy_ll_s0: + si_MInst_sisi_ll <"mpy", int_hexagon_M2_mpy_ll_s0>; +def Hexagon_M2_mpy_ll_s1: + si_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpy_ll_s1>; +def Hexagon_M2_mpy_rnd_ll_s1: + si_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_rnd_ll_s1>; +def Hexagon_M2_mpy_sat_rnd_ll_s1: + si_MInst_sisi_sat_rnd_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s1>; +def Hexagon_M2_mpy_sat_ll_s1: + si_MInst_sisi_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_sat_ll_s1>; +def Hexagon_M2_mpy_rnd_ll_s0: + si_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpy_rnd_ll_s0>; +def Hexagon_M2_mpy_sat_rnd_ll_s0: + si_MInst_sisi_sat_rnd_ll <"mpy", int_hexagon_M2_mpy_sat_rnd_ll_s0>; +def Hexagon_M2_mpy_sat_ll_s0: + si_MInst_sisi_sat_ll <"mpy", int_hexagon_M2_mpy_sat_ll_s0>; + +//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]] +def Hexagon_M2_mpyd_hh_s0: + di_MInst_sisi_hh <"mpy", int_hexagon_M2_mpyd_hh_s0>; +def Hexagon_M2_mpyd_hh_s1: + di_MInst_sisi_hh_s1 <"mpy", int_hexagon_M2_mpyd_hh_s1>; +def Hexagon_M2_mpyd_rnd_hh_s1: + di_MInst_sisi_rnd_hh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hh_s1>; +def Hexagon_M2_mpyd_rnd_hh_s0: + di_MInst_sisi_rnd_hh <"mpy", int_hexagon_M2_mpyd_rnd_hh_s0>; + +def Hexagon_M2_mpyd_hl_s0: + di_MInst_sisi_hl <"mpy", int_hexagon_M2_mpyd_hl_s0>; +def Hexagon_M2_mpyd_hl_s1: + di_MInst_sisi_hl_s1 <"mpy", int_hexagon_M2_mpyd_hl_s1>; +def Hexagon_M2_mpyd_rnd_hl_s1: + di_MInst_sisi_rnd_hl_s1 <"mpy", int_hexagon_M2_mpyd_rnd_hl_s1>; +def Hexagon_M2_mpyd_rnd_hl_s0: + di_MInst_sisi_rnd_hl <"mpy", int_hexagon_M2_mpyd_rnd_hl_s0>; + +def Hexagon_M2_mpyd_lh_s0: + di_MInst_sisi_lh <"mpy", int_hexagon_M2_mpyd_lh_s0>; +def Hexagon_M2_mpyd_lh_s1: + di_MInst_sisi_lh_s1 <"mpy", int_hexagon_M2_mpyd_lh_s1>; +def Hexagon_M2_mpyd_rnd_lh_s1: + di_MInst_sisi_rnd_lh_s1 <"mpy", int_hexagon_M2_mpyd_rnd_lh_s1>; +def Hexagon_M2_mpyd_rnd_lh_s0: + di_MInst_sisi_rnd_lh <"mpy", int_hexagon_M2_mpyd_rnd_lh_s0>; + +def Hexagon_M2_mpyd_ll_s0: + di_MInst_sisi_ll <"mpy", int_hexagon_M2_mpyd_ll_s0>; +def Hexagon_M2_mpyd_ll_s1: + di_MInst_sisi_ll_s1 <"mpy", int_hexagon_M2_mpyd_ll_s1>; +def Hexagon_M2_mpyd_rnd_ll_s1: + di_MInst_sisi_rnd_ll_s1 <"mpy", int_hexagon_M2_mpyd_rnd_ll_s1>; +def Hexagon_M2_mpyd_rnd_ll_s0: + di_MInst_sisi_rnd_ll <"mpy", int_hexagon_M2_mpyd_rnd_ll_s0>; + +//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] +def Hexagon_M2_mpy_acc_hh_s0: + si_MInst_sisisi_acc_hh <"mpy", int_hexagon_M2_mpy_acc_hh_s0>; +def Hexagon_M2_mpy_acc_hh_s1: + si_MInst_sisisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_hh_s1>; +def Hexagon_M2_mpy_acc_sat_hh_s1: + si_MInst_sisisi_acc_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s1>; +def Hexagon_M2_mpy_acc_sat_hh_s0: + si_MInst_sisisi_acc_sat_hh <"mpy", int_hexagon_M2_mpy_acc_sat_hh_s0>; + +def Hexagon_M2_mpy_acc_hl_s0: + si_MInst_sisisi_acc_hl <"mpy", int_hexagon_M2_mpy_acc_hl_s0>; +def Hexagon_M2_mpy_acc_hl_s1: + si_MInst_sisisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_hl_s1>; +def Hexagon_M2_mpy_acc_sat_hl_s1: + si_MInst_sisisi_acc_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s1>; +def Hexagon_M2_mpy_acc_sat_hl_s0: + si_MInst_sisisi_acc_sat_hl <"mpy", int_hexagon_M2_mpy_acc_sat_hl_s0>; + +def Hexagon_M2_mpy_acc_lh_s0: + si_MInst_sisisi_acc_lh <"mpy", int_hexagon_M2_mpy_acc_lh_s0>; +def Hexagon_M2_mpy_acc_lh_s1: + si_MInst_sisisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_lh_s1>; +def Hexagon_M2_mpy_acc_sat_lh_s1: + si_MInst_sisisi_acc_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s1>; +def Hexagon_M2_mpy_acc_sat_lh_s0: + si_MInst_sisisi_acc_sat_lh <"mpy", int_hexagon_M2_mpy_acc_sat_lh_s0>; + +def Hexagon_M2_mpy_acc_ll_s0: + si_MInst_sisisi_acc_ll <"mpy", int_hexagon_M2_mpy_acc_ll_s0>; +def Hexagon_M2_mpy_acc_ll_s1: + si_MInst_sisisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_ll_s1>; +def Hexagon_M2_mpy_acc_sat_ll_s1: + si_MInst_sisisi_acc_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s1>; +def Hexagon_M2_mpy_acc_sat_ll_s0: + si_MInst_sisisi_acc_sat_ll <"mpy", int_hexagon_M2_mpy_acc_sat_ll_s0>; + +//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]] +def Hexagon_M2_mpy_nac_hh_s0: + si_MInst_sisisi_nac_hh <"mpy", int_hexagon_M2_mpy_nac_hh_s0>; +def Hexagon_M2_mpy_nac_hh_s1: + si_MInst_sisisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_hh_s1>; +def Hexagon_M2_mpy_nac_sat_hh_s1: + si_MInst_sisisi_nac_sat_hh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s1>; +def Hexagon_M2_mpy_nac_sat_hh_s0: + si_MInst_sisisi_nac_sat_hh <"mpy", int_hexagon_M2_mpy_nac_sat_hh_s0>; + +def Hexagon_M2_mpy_nac_hl_s0: + si_MInst_sisisi_nac_hl <"mpy", int_hexagon_M2_mpy_nac_hl_s0>; +def Hexagon_M2_mpy_nac_hl_s1: + si_MInst_sisisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_hl_s1>; +def Hexagon_M2_mpy_nac_sat_hl_s1: + si_MInst_sisisi_nac_sat_hl_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s1>; +def Hexagon_M2_mpy_nac_sat_hl_s0: + si_MInst_sisisi_nac_sat_hl <"mpy", int_hexagon_M2_mpy_nac_sat_hl_s0>; + +def Hexagon_M2_mpy_nac_lh_s0: + si_MInst_sisisi_nac_lh <"mpy", int_hexagon_M2_mpy_nac_lh_s0>; +def Hexagon_M2_mpy_nac_lh_s1: + si_MInst_sisisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_lh_s1>; +def Hexagon_M2_mpy_nac_sat_lh_s1: + si_MInst_sisisi_nac_sat_lh_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s1>; +def Hexagon_M2_mpy_nac_sat_lh_s0: + si_MInst_sisisi_nac_sat_lh <"mpy", int_hexagon_M2_mpy_nac_sat_lh_s0>; + +def Hexagon_M2_mpy_nac_ll_s0: + si_MInst_sisisi_nac_ll <"mpy", int_hexagon_M2_mpy_nac_ll_s0>; +def Hexagon_M2_mpy_nac_ll_s1: + si_MInst_sisisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_ll_s1>; +def Hexagon_M2_mpy_nac_sat_ll_s1: + si_MInst_sisisi_nac_sat_ll_s1 <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s1>; +def Hexagon_M2_mpy_nac_sat_ll_s0: + si_MInst_sisisi_nac_sat_ll <"mpy", int_hexagon_M2_mpy_nac_sat_ll_s0>; + +//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] +def Hexagon_M2_mpyd_acc_hh_s0: + di_MInst_disisi_acc_hh <"mpy", int_hexagon_M2_mpyd_acc_hh_s0>; +def Hexagon_M2_mpyd_acc_hh_s1: + di_MInst_disisi_acc_hh_s1 <"mpy", int_hexagon_M2_mpyd_acc_hh_s1>; + +def Hexagon_M2_mpyd_acc_hl_s0: + di_MInst_disisi_acc_hl <"mpy", int_hexagon_M2_mpyd_acc_hl_s0>; +def Hexagon_M2_mpyd_acc_hl_s1: + di_MInst_disisi_acc_hl_s1 <"mpy", int_hexagon_M2_mpyd_acc_hl_s1>; + +def Hexagon_M2_mpyd_acc_lh_s0: + di_MInst_disisi_acc_lh <"mpy", int_hexagon_M2_mpyd_acc_lh_s0>; +def Hexagon_M2_mpyd_acc_lh_s1: + di_MInst_disisi_acc_lh_s1 <"mpy", int_hexagon_M2_mpyd_acc_lh_s1>; + +def Hexagon_M2_mpyd_acc_ll_s0: + di_MInst_disisi_acc_ll <"mpy", int_hexagon_M2_mpyd_acc_ll_s0>; +def Hexagon_M2_mpyd_acc_ll_s1: + di_MInst_disisi_acc_ll_s1 <"mpy", int_hexagon_M2_mpyd_acc_ll_s1>; + +//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1] +def Hexagon_M2_mpyd_nac_hh_s0: + di_MInst_disisi_nac_hh <"mpy", int_hexagon_M2_mpyd_nac_hh_s0>; +def Hexagon_M2_mpyd_nac_hh_s1: + di_MInst_disisi_nac_hh_s1 <"mpy", int_hexagon_M2_mpyd_nac_hh_s1>; + +def Hexagon_M2_mpyd_nac_hl_s0: + di_MInst_disisi_nac_hl <"mpy", int_hexagon_M2_mpyd_nac_hl_s0>; +def Hexagon_M2_mpyd_nac_hl_s1: + di_MInst_disisi_nac_hl_s1 <"mpy", int_hexagon_M2_mpyd_nac_hl_s1>; + +def Hexagon_M2_mpyd_nac_lh_s0: + di_MInst_disisi_nac_lh <"mpy", int_hexagon_M2_mpyd_nac_lh_s0>; +def Hexagon_M2_mpyd_nac_lh_s1: + di_MInst_disisi_nac_lh_s1 <"mpy", int_hexagon_M2_mpyd_nac_lh_s1>; + +def Hexagon_M2_mpyd_nac_ll_s0: + di_MInst_disisi_nac_ll <"mpy", int_hexagon_M2_mpyd_nac_ll_s0>; +def Hexagon_M2_mpyd_nac_ll_s1: + di_MInst_disisi_nac_ll_s1 <"mpy", int_hexagon_M2_mpyd_nac_ll_s1>; + +// MTYPE / MPYS / Scalar 16x16 multiply unsigned. +//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyu_hh_s0: + si_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyu_hh_s0>; +def Hexagon_M2_mpyu_hh_s1: + si_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyu_hh_s1>; +def Hexagon_M2_mpyu_hl_s0: + si_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyu_hl_s0>; +def Hexagon_M2_mpyu_hl_s1: + si_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyu_hl_s1>; +def Hexagon_M2_mpyu_lh_s0: + si_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyu_lh_s0>; +def Hexagon_M2_mpyu_lh_s1: + si_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyu_lh_s1>; +def Hexagon_M2_mpyu_ll_s0: + si_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyu_ll_s0>; +def Hexagon_M2_mpyu_ll_s1: + si_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyu_ll_s1>; + +//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyud_hh_s0: + di_MInst_sisi_hh <"mpyu", int_hexagon_M2_mpyud_hh_s0>; +def Hexagon_M2_mpyud_hh_s1: + di_MInst_sisi_hh_s1 <"mpyu", int_hexagon_M2_mpyud_hh_s1>; +def Hexagon_M2_mpyud_hl_s0: + di_MInst_sisi_hl <"mpyu", int_hexagon_M2_mpyud_hl_s0>; +def Hexagon_M2_mpyud_hl_s1: + di_MInst_sisi_hl_s1 <"mpyu", int_hexagon_M2_mpyud_hl_s1>; +def Hexagon_M2_mpyud_lh_s0: + di_MInst_sisi_lh <"mpyu", int_hexagon_M2_mpyud_lh_s0>; +def Hexagon_M2_mpyud_lh_s1: + di_MInst_sisi_lh_s1 <"mpyu", int_hexagon_M2_mpyud_lh_s1>; +def Hexagon_M2_mpyud_ll_s0: + di_MInst_sisi_ll <"mpyu", int_hexagon_M2_mpyud_ll_s0>; +def Hexagon_M2_mpyud_ll_s1: + di_MInst_sisi_ll_s1 <"mpyu", int_hexagon_M2_mpyud_ll_s1>; + +//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyu_acc_hh_s0: + si_MInst_sisisi_acc_hh <"mpyu", int_hexagon_M2_mpyu_acc_hh_s0>; +def Hexagon_M2_mpyu_acc_hh_s1: + si_MInst_sisisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hh_s1>; +def Hexagon_M2_mpyu_acc_hl_s0: + si_MInst_sisisi_acc_hl <"mpyu", int_hexagon_M2_mpyu_acc_hl_s0>; +def Hexagon_M2_mpyu_acc_hl_s1: + si_MInst_sisisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyu_acc_hl_s1>; +def Hexagon_M2_mpyu_acc_lh_s0: + si_MInst_sisisi_acc_lh <"mpyu", int_hexagon_M2_mpyu_acc_lh_s0>; +def Hexagon_M2_mpyu_acc_lh_s1: + si_MInst_sisisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyu_acc_lh_s1>; +def Hexagon_M2_mpyu_acc_ll_s0: + si_MInst_sisisi_acc_ll <"mpyu", int_hexagon_M2_mpyu_acc_ll_s0>; +def Hexagon_M2_mpyu_acc_ll_s1: + si_MInst_sisisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyu_acc_ll_s1>; + +//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyu_nac_hh_s0: + si_MInst_sisisi_nac_hh <"mpyu", int_hexagon_M2_mpyu_nac_hh_s0>; +def Hexagon_M2_mpyu_nac_hh_s1: + si_MInst_sisisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hh_s1>; +def Hexagon_M2_mpyu_nac_hl_s0: + si_MInst_sisisi_nac_hl <"mpyu", int_hexagon_M2_mpyu_nac_hl_s0>; +def Hexagon_M2_mpyu_nac_hl_s1: + si_MInst_sisisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyu_nac_hl_s1>; +def Hexagon_M2_mpyu_nac_lh_s0: + si_MInst_sisisi_nac_lh <"mpyu", int_hexagon_M2_mpyu_nac_lh_s0>; +def Hexagon_M2_mpyu_nac_lh_s1: + si_MInst_sisisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyu_nac_lh_s1>; +def Hexagon_M2_mpyu_nac_ll_s0: + si_MInst_sisisi_nac_ll <"mpyu", int_hexagon_M2_mpyu_nac_ll_s0>; +def Hexagon_M2_mpyu_nac_ll_s1: + si_MInst_sisisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyu_nac_ll_s1>; + +//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyud_acc_hh_s0: + di_MInst_disisi_acc_hh <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>; +def Hexagon_M2_mpyud_acc_hh_s1: + di_MInst_disisi_acc_hh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>; +def Hexagon_M2_mpyud_acc_hl_s0: + di_MInst_disisi_acc_hl <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>; +def Hexagon_M2_mpyud_acc_hl_s1: + di_MInst_disisi_acc_hl_s1 <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>; +def Hexagon_M2_mpyud_acc_lh_s0: + di_MInst_disisi_acc_lh <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>; +def Hexagon_M2_mpyud_acc_lh_s1: + di_MInst_disisi_acc_lh_s1 <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>; +def Hexagon_M2_mpyud_acc_ll_s0: + di_MInst_disisi_acc_ll <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>; +def Hexagon_M2_mpyud_acc_ll_s1: + di_MInst_disisi_acc_ll_s1 <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>; + +//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1] +def Hexagon_M2_mpyud_nac_hh_s0: + di_MInst_disisi_nac_hh <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>; +def Hexagon_M2_mpyud_nac_hh_s1: + di_MInst_disisi_nac_hh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>; +def Hexagon_M2_mpyud_nac_hl_s0: + di_MInst_disisi_nac_hl <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>; +def Hexagon_M2_mpyud_nac_hl_s1: + di_MInst_disisi_nac_hl_s1 <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>; +def Hexagon_M2_mpyud_nac_lh_s0: + di_MInst_disisi_nac_lh <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>; +def Hexagon_M2_mpyud_nac_lh_s1: + di_MInst_disisi_nac_lh_s1 <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>; +def Hexagon_M2_mpyud_nac_ll_s0: + di_MInst_disisi_nac_ll <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>; +def Hexagon_M2_mpyud_nac_ll_s1: + di_MInst_disisi_nac_ll_s1 <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>; + + +/******************************************************************** +* MTYPE/VB * +*********************************************************************/ + +// MTYPE / VB / Vector reduce add unsigned bytes. +def Hexagon_A2_vraddub: + di_MInst_didi <"vraddub", int_hexagon_A2_vraddub>; +def Hexagon_A2_vraddub_acc: + di_MInst_dididi_acc <"vraddub", int_hexagon_A2_vraddub_acc>; + +// MTYPE / VB / Vector sum of absolute differences unsigned bytes. +def Hexagon_A2_vrsadub: + di_MInst_didi <"vrsadub", int_hexagon_A2_vrsadub>; +def Hexagon_A2_vrsadub_acc: + di_MInst_dididi_acc <"vrsadub", int_hexagon_A2_vrsadub_acc>; + +/******************************************************************** +* MTYPE/VH * +*********************************************************************/ + +// MTYPE / VH / Vector dual multiply. +def Hexagon_M2_vdmpys_s1: + di_MInst_didi_s1_sat <"vdmpy", int_hexagon_M2_vdmpys_s1>; +def Hexagon_M2_vdmpys_s0: + di_MInst_didi_sat <"vdmpy", int_hexagon_M2_vdmpys_s0>; +def Hexagon_M2_vdmacs_s1: + di_MInst_dididi_acc_s1_sat <"vdmpy", int_hexagon_M2_vdmacs_s1>; +def Hexagon_M2_vdmacs_s0: + di_MInst_dididi_acc_sat <"vdmpy", int_hexagon_M2_vdmacs_s0>; + +// MTYPE / VH / Vector dual multiply with round and pack. +def Hexagon_M2_vdmpyrs_s0: + si_MInst_didi_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s0>; +def Hexagon_M2_vdmpyrs_s1: + si_MInst_didi_s1_rnd_sat <"vdmpy", int_hexagon_M2_vdmpyrs_s1>; + +// MTYPE / VH / Vector multiply even halfwords. +def Hexagon_M2_vmpy2es_s1: + di_MInst_didi_s1_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s1>; +def Hexagon_M2_vmpy2es_s0: + di_MInst_didi_sat <"vmpyeh", int_hexagon_M2_vmpy2es_s0>; +def Hexagon_M2_vmac2es: + di_MInst_dididi_acc <"vmpyeh", int_hexagon_M2_vmac2es>; +def Hexagon_M2_vmac2es_s1: + di_MInst_dididi_acc_s1_sat <"vmpyeh", int_hexagon_M2_vmac2es_s1>; +def Hexagon_M2_vmac2es_s0: + di_MInst_dididi_acc_sat <"vmpyeh", int_hexagon_M2_vmac2es_s0>; + +// MTYPE / VH / Vector multiply halfwords. +def Hexagon_M2_vmpy2s_s0: + di_MInst_sisi_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0>; +def Hexagon_M2_vmpy2s_s1: + di_MInst_sisi_s1_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1>; +def Hexagon_M2_vmac2: + di_MInst_disisi_acc <"vmpyh", int_hexagon_M2_vmac2>; +def Hexagon_M2_vmac2s_s0: + di_MInst_disisi_acc_sat <"vmpyh", int_hexagon_M2_vmac2s_s0>; +def Hexagon_M2_vmac2s_s1: + di_MInst_disisi_acc_s1_sat <"vmpyh", int_hexagon_M2_vmac2s_s1>; + +// MTYPE / VH / Vector multiply halfwords with round and pack. +def Hexagon_M2_vmpy2s_s0pack: + si_MInst_sisi_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s0pack>; +def Hexagon_M2_vmpy2s_s1pack: + si_MInst_sisi_s1_rnd_sat <"vmpyh", int_hexagon_M2_vmpy2s_s1pack>; + +// MTYPE / VH / Vector reduce multiply halfwords. +// Rxx32+=vrmpyh(Rss32,Rtt32) +def Hexagon_M2_vrmpy_s0: + di_MInst_didi <"vrmpyh", int_hexagon_M2_vrmpy_s0>; +def Hexagon_M2_vrmac_s0: + di_MInst_dididi_acc <"vrmpyh", int_hexagon_M2_vrmac_s0>; + + +/******************************************************************** +* STYPE/ALU * +*********************************************************************/ + +// STYPE / ALU / Absolute value. +def Hexagon_A2_abs: + si_SInst_si <"abs", int_hexagon_A2_abs>; +def Hexagon_A2_absp: + di_SInst_di <"abs", int_hexagon_A2_absp>; +def Hexagon_A2_abssat: + si_SInst_si_sat <"abs", int_hexagon_A2_abssat>; + +// STYPE / ALU / Negate. +def Hexagon_A2_negp: + di_SInst_di <"neg", int_hexagon_A2_negp>; +def Hexagon_A2_negsat: + si_SInst_si_sat <"neg", int_hexagon_A2_negsat>; + +// STYPE / ALU / Logical Not. +def Hexagon_A2_notp: + di_SInst_di <"not", int_hexagon_A2_notp>; + +// STYPE / ALU / Sign extend word to doubleword. +def Hexagon_A2_sxtw: + di_SInst_si <"sxtw", int_hexagon_A2_sxtw>; + + +/******************************************************************** +* STYPE/BIT * +*********************************************************************/ + +// STYPE / BIT / Count leading. +def Hexagon_S2_cl0: + si_SInst_si <"cl0", int_hexagon_S2_cl0>; +def Hexagon_S2_cl0p: + si_SInst_di <"cl0", int_hexagon_S2_cl0p>; +def Hexagon_S2_cl1: + si_SInst_si <"cl1", int_hexagon_S2_cl1>; +def Hexagon_S2_cl1p: + si_SInst_di <"cl1", int_hexagon_S2_cl1p>; +def Hexagon_S2_clb: + si_SInst_si <"clb", int_hexagon_S2_clb>; +def Hexagon_S2_clbp: + si_SInst_di <"clb", int_hexagon_S2_clbp>; +def Hexagon_S2_clbnorm: + si_SInst_si <"normamt", int_hexagon_S2_clbnorm>; + +// STYPE / BIT / Count trailing. +def Hexagon_S2_ct0: + si_SInst_si <"ct0", int_hexagon_S2_ct0>; +def Hexagon_S2_ct1: + si_SInst_si <"ct1", int_hexagon_S2_ct1>; + +// STYPE / BIT / Compare bit mask. +def HEXAGON_C2_bitsclr: + qi_SInst_sisi <"bitsclr", int_hexagon_C2_bitsclr>; +def HEXAGON_C2_bitsclri: + qi_SInst_siu6 <"bitsclr", int_hexagon_C2_bitsclri>; +def HEXAGON_C2_bitsset: + qi_SInst_sisi <"bitsset", int_hexagon_C2_bitsset>; + +// STYPE / BIT / Extract unsigned. +// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm]) +def Hexagon_S2_extractu: + si_SInst_siu5u5 <"extractu",int_hexagon_S2_extractu>; +def Hexagon_S2_extractu_rp: + si_SInst_sidi <"extractu",int_hexagon_S2_extractu_rp>; +def Hexagon_S2_extractup: + di_SInst_diu6u6 <"extractu",int_hexagon_S2_extractup>; +def Hexagon_S2_extractup_rp: + di_SInst_didi <"extractu",int_hexagon_S2_extractup_rp>; + +// STYPE / BIT / Insert bitfield. +def HEXAGON_S2_insert: + si_SInst_sisiu5u5 <"insert", int_hexagon_S2_insert>; +def HEXAGON_S2_insert_rp: + si_SInst_sisidi <"insert", int_hexagon_S2_insert_rp>; +def HEXAGON_S2_insertp: + di_SInst_didiu6u6 <"insert", int_hexagon_S2_insertp>; +def HEXAGON_S2_insertp_rp: + di_SInst_dididi <"insert", int_hexagon_S2_insertp_rp>; + +// STYPE / BIT / Innterleave/deinterleave. +def HEXAGON_S2_interleave: + di_SInst_di <"interleave", int_hexagon_S2_interleave>; +def HEXAGON_S2_deinterleave: + di_SInst_di <"deinterleave", int_hexagon_S2_deinterleave>; + +// STYPE / BIT / Linear feedback-shift Iteration. +def HEXAGON_S2_lfsp: + di_SInst_didi <"lfs", int_hexagon_S2_lfsp>; + +// STYPE / BIT / Bit reverse. +def HEXAGON_S2_brev: + si_SInst_si <"brev", int_hexagon_S2_brev>; + +// STYPE / BIT / Set/Clear/Toggle Bit. +def Hexagon_S2_setbit_i: + si_SInst_siu5 <"setbit", int_hexagon_S2_setbit_i>; +def Hexagon_S2_togglebit_i: + si_SInst_siu5 <"togglebit", int_hexagon_S2_togglebit_i>; +def Hexagon_S2_clrbit_i: + si_SInst_siu5 <"clrbit", int_hexagon_S2_clrbit_i>; +def Hexagon_S2_setbit_r: + si_SInst_sisi <"setbit", int_hexagon_S2_setbit_r>; +def Hexagon_S2_togglebit_r: + si_SInst_sisi <"togglebit", int_hexagon_S2_togglebit_r>; +def Hexagon_S2_clrbit_r: + si_SInst_sisi <"clrbit", int_hexagon_S2_clrbit_r>; + +// STYPE / BIT / Test Bit. +def Hexagon_S2_tstbit_i: + qi_SInst_siu5 <"tstbit", int_hexagon_S2_tstbit_i>; +def Hexagon_S2_tstbit_r: + qi_SInst_sisi <"tstbit", int_hexagon_S2_tstbit_r>; + + +/******************************************************************** +* STYPE/COMPLEX * +*********************************************************************/ + +// STYPE / COMPLEX / Vector Complex conjugate. +def Hexagon_A2_vconj: + di_SInst_di_sat <"vconj", int_hexagon_A2_vconj>; + +// STYPE / COMPLEX / Vector Complex rotate. +def Hexagon_S2_vcrotate: + di_SInst_disi <"vcrotate",int_hexagon_S2_vcrotate>; + + +/******************************************************************** +* STYPE/PERM * +*********************************************************************/ + +// STYPE / PERM / Saturate. +def Hexagon_A2_sat: + si_SInst_di <"sat", int_hexagon_A2_sat>; +def Hexagon_A2_satb: + si_SInst_si <"satb", int_hexagon_A2_satb>; +def Hexagon_A2_sath: + si_SInst_si <"sath", int_hexagon_A2_sath>; +def Hexagon_A2_satub: + si_SInst_si <"satub", int_hexagon_A2_satub>; +def Hexagon_A2_satuh: + si_SInst_si <"satuh", int_hexagon_A2_satuh>; + +// STYPE / PERM / Swizzle bytes. +def Hexagon_A2_swiz: + si_SInst_si <"swiz", int_hexagon_A2_swiz>; + +// STYPE / PERM / Vector align. +// Need custom lowering +def Hexagon_S2_valignib: + di_SInst_didiu3 <"valignb", int_hexagon_S2_valignib>; +def Hexagon_S2_valignrb: + di_SInst_didiqi <"valignb", int_hexagon_S2_valignrb>; + +// STYPE / PERM / Vector round and pack. +def Hexagon_S2_vrndpackwh: + si_SInst_di <"vrndwh", int_hexagon_S2_vrndpackwh>; +def Hexagon_S2_vrndpackwhs: + si_SInst_di_sat <"vrndwh", int_hexagon_S2_vrndpackwhs>; + +// STYPE / PERM / Vector saturate and pack. +def Hexagon_S2_svsathb: + si_SInst_si <"vsathb", int_hexagon_S2_svsathb>; +def Hexagon_S2_vsathb: + si_SInst_di <"vsathb", int_hexagon_S2_vsathb>; +def Hexagon_S2_svsathub: + si_SInst_si <"vsathub", int_hexagon_S2_svsathub>; +def Hexagon_S2_vsathub: + si_SInst_di <"vsathub", int_hexagon_S2_vsathub>; +def Hexagon_S2_vsatwh: + si_SInst_di <"vsatwh", int_hexagon_S2_vsatwh>; +def Hexagon_S2_vsatwuh: + si_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh>; + +// STYPE / PERM / Vector saturate without pack. +def Hexagon_S2_vsathb_nopack: + di_SInst_di <"vsathb", int_hexagon_S2_vsathb_nopack>; +def Hexagon_S2_vsathub_nopack: + di_SInst_di <"vsathub", int_hexagon_S2_vsathub_nopack>; +def Hexagon_S2_vsatwh_nopack: + di_SInst_di <"vsatwh", int_hexagon_S2_vsatwh_nopack>; +def Hexagon_S2_vsatwuh_nopack: + di_SInst_di <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>; + +// STYPE / PERM / Vector shuffle. +def Hexagon_S2_shuffeb: + di_SInst_didi <"shuffeb", int_hexagon_S2_shuffeb>; +def Hexagon_S2_shuffeh: + di_SInst_didi <"shuffeh", int_hexagon_S2_shuffeh>; +def Hexagon_S2_shuffob: + di_SInst_didi <"shuffob", int_hexagon_S2_shuffob>; +def Hexagon_S2_shuffoh: + di_SInst_didi <"shuffoh", int_hexagon_S2_shuffoh>; + +// STYPE / PERM / Vector splat bytes. +def Hexagon_S2_vsplatrb: + si_SInst_si <"vsplatb", int_hexagon_S2_vsplatrb>; + +// STYPE / PERM / Vector splat halfwords. +def Hexagon_S2_vsplatrh: + di_SInst_si <"vsplath", int_hexagon_S2_vsplatrh>; + +// STYPE / PERM / Vector splice. +def HEXAGON_S2_vsplicerb: + di_SInst_didiqi <"vspliceb",int_hexagon_S2_vsplicerb>; +def HEXAGON_S2_vspliceib: + di_SInst_didiu3 <"vspliceb",int_hexagon_S2_vspliceib>; + +// STYPE / PERM / Sign extend. +def Hexagon_S2_vsxtbh: + di_SInst_si <"vsxtbh", int_hexagon_S2_vsxtbh>; +def Hexagon_S2_vsxthw: + di_SInst_si <"vsxthw", int_hexagon_S2_vsxthw>; + +// STYPE / PERM / Truncate. +def Hexagon_S2_vtrunehb: + si_SInst_di <"vtrunehb",int_hexagon_S2_vtrunehb>; +def Hexagon_S2_vtrunohb: + si_SInst_di <"vtrunohb",int_hexagon_S2_vtrunohb>; +def Hexagon_S2_vtrunewh: + di_SInst_didi <"vtrunewh",int_hexagon_S2_vtrunewh>; +def Hexagon_S2_vtrunowh: + di_SInst_didi <"vtrunowh",int_hexagon_S2_vtrunowh>; + +// STYPE / PERM / Zero extend. +def Hexagon_S2_vzxtbh: + di_SInst_si <"vzxtbh", int_hexagon_S2_vzxtbh>; +def Hexagon_S2_vzxthw: + di_SInst_si <"vzxthw", int_hexagon_S2_vzxthw>; + + +/******************************************************************** +* STYPE/PRED * +*********************************************************************/ + +// STYPE / PRED / Mask generate from predicate. +def Hexagon_C2_mask: + di_SInst_qi <"mask", int_hexagon_C2_mask>; + +// STYPE / PRED / Predicate transfer. +def Hexagon_C2_tfrpr: + si_SInst_qi <"", int_hexagon_C2_tfrpr>; +def Hexagon_C2_tfrrp: + qi_SInst_si <"", int_hexagon_C2_tfrrp>; + +// STYPE / PRED / Viterbi pack even and odd predicate bits. +def Hexagon_C2_vitpack: + si_SInst_qiqi <"vitpack",int_hexagon_C2_vitpack>; + + +/******************************************************************** +* STYPE/SHIFT * +*********************************************************************/ + +// STYPE / SHIFT / Shift by immediate. +def Hexagon_S2_asl_i_r: + si_SInst_siu5 <"asl", int_hexagon_S2_asl_i_r>; +def Hexagon_S2_asr_i_r: + si_SInst_siu5 <"asr", int_hexagon_S2_asr_i_r>; +def Hexagon_S2_lsr_i_r: + si_SInst_siu5 <"lsr", int_hexagon_S2_lsr_i_r>; +def Hexagon_S2_asl_i_p: + di_SInst_diu6 <"asl", int_hexagon_S2_asl_i_p>; +def Hexagon_S2_asr_i_p: + di_SInst_diu6 <"asr", int_hexagon_S2_asr_i_p>; +def Hexagon_S2_lsr_i_p: + di_SInst_diu6 <"lsr", int_hexagon_S2_lsr_i_p>; + +// STYPE / SHIFT / Shift by immediate and accumulate. +def Hexagon_S2_asl_i_r_acc: + si_SInst_sisiu5_acc <"asl", int_hexagon_S2_asl_i_r_acc>; +def Hexagon_S2_asr_i_r_acc: + si_SInst_sisiu5_acc <"asr", int_hexagon_S2_asr_i_r_acc>; +def Hexagon_S2_lsr_i_r_acc: + si_SInst_sisiu5_acc <"lsr", int_hexagon_S2_lsr_i_r_acc>; +def Hexagon_S2_asl_i_r_nac: + si_SInst_sisiu5_nac <"asl", int_hexagon_S2_asl_i_r_nac>; +def Hexagon_S2_asr_i_r_nac: + si_SInst_sisiu5_nac <"asr", int_hexagon_S2_asr_i_r_nac>; +def Hexagon_S2_lsr_i_r_nac: + si_SInst_sisiu5_nac <"lsr", int_hexagon_S2_lsr_i_r_nac>; +def Hexagon_S2_asl_i_p_acc: + di_SInst_didiu6_acc <"asl", int_hexagon_S2_asl_i_p_acc>; +def Hexagon_S2_asr_i_p_acc: + di_SInst_didiu6_acc <"asr", int_hexagon_S2_asr_i_p_acc>; +def Hexagon_S2_lsr_i_p_acc: + di_SInst_didiu6_acc <"lsr", int_hexagon_S2_lsr_i_p_acc>; +def Hexagon_S2_asl_i_p_nac: + di_SInst_didiu6_nac <"asl", int_hexagon_S2_asl_i_p_nac>; +def Hexagon_S2_asr_i_p_nac: + di_SInst_didiu6_nac <"asr", int_hexagon_S2_asr_i_p_nac>; +def Hexagon_S2_lsr_i_p_nac: + di_SInst_didiu6_nac <"lsr", int_hexagon_S2_lsr_i_p_nac>; + +// STYPE / SHIFT / Shift by immediate and add. +def Hexagon_S2_addasl_rrri: + si_SInst_sisiu3 <"addasl", int_hexagon_S2_addasl_rrri>; + +// STYPE / SHIFT / Shift by immediate and logical. +def Hexagon_S2_asl_i_r_and: + si_SInst_sisiu5_and <"asl", int_hexagon_S2_asl_i_r_and>; +def Hexagon_S2_asr_i_r_and: + si_SInst_sisiu5_and <"asr", int_hexagon_S2_asr_i_r_and>; +def Hexagon_S2_lsr_i_r_and: + si_SInst_sisiu5_and <"lsr", int_hexagon_S2_lsr_i_r_and>; + +def Hexagon_S2_asl_i_r_xacc: + si_SInst_sisiu5_xor <"asl", int_hexagon_S2_asl_i_r_xacc>; +def Hexagon_S2_lsr_i_r_xacc: + si_SInst_sisiu5_xor <"lsr", int_hexagon_S2_lsr_i_r_xacc>; + +def Hexagon_S2_asl_i_r_or: + si_SInst_sisiu5_or <"asl", int_hexagon_S2_asl_i_r_or>; +def Hexagon_S2_asr_i_r_or: + si_SInst_sisiu5_or <"asr", int_hexagon_S2_asr_i_r_or>; +def Hexagon_S2_lsr_i_r_or: + si_SInst_sisiu5_or <"lsr", int_hexagon_S2_lsr_i_r_or>; + +def Hexagon_S2_asl_i_p_and: + di_SInst_didiu6_and <"asl", int_hexagon_S2_asl_i_p_and>; +def Hexagon_S2_asr_i_p_and: + di_SInst_didiu6_and <"asr", int_hexagon_S2_asr_i_p_and>; +def Hexagon_S2_lsr_i_p_and: + di_SInst_didiu6_and <"lsr", int_hexagon_S2_lsr_i_p_and>; + +def Hexagon_S2_asl_i_p_xacc: + di_SInst_didiu6_xor <"asl", int_hexagon_S2_asl_i_p_xacc>; +def Hexagon_S2_lsr_i_p_xacc: + di_SInst_didiu6_xor <"lsr", int_hexagon_S2_lsr_i_p_xacc>; + +def Hexagon_S2_asl_i_p_or: + di_SInst_didiu6_or <"asl", int_hexagon_S2_asl_i_p_or>; +def Hexagon_S2_asr_i_p_or: + di_SInst_didiu6_or <"asr", int_hexagon_S2_asr_i_p_or>; +def Hexagon_S2_lsr_i_p_or: + di_SInst_didiu6_or <"lsr", int_hexagon_S2_lsr_i_p_or>; + +// STYPE / SHIFT / Shift right by immediate with rounding. +def Hexagon_S2_asr_i_r_rnd: + si_SInst_siu5_rnd <"asr", int_hexagon_S2_asr_i_r_rnd>; +def Hexagon_S2_asr_i_r_rnd_goodsyntax: + si_SInst_siu5 <"asrrnd", int_hexagon_S2_asr_i_r_rnd_goodsyntax>; + +// STYPE / SHIFT / Shift left by immediate with saturation. +def Hexagon_S2_asl_i_r_sat: + si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_i_r_sat>; + +// STYPE / SHIFT / Shift by register. +def Hexagon_S2_asl_r_r: + si_SInst_sisi <"asl", int_hexagon_S2_asl_r_r>; +def Hexagon_S2_asr_r_r: + si_SInst_sisi <"asr", int_hexagon_S2_asr_r_r>; +def Hexagon_S2_lsl_r_r: + si_SInst_sisi <"lsl", int_hexagon_S2_lsl_r_r>; +def Hexagon_S2_lsr_r_r: + si_SInst_sisi <"lsr", int_hexagon_S2_lsr_r_r>; +def Hexagon_S2_asl_r_p: + di_SInst_disi <"asl", int_hexagon_S2_asl_r_p>; +def Hexagon_S2_asr_r_p: + di_SInst_disi <"asr", int_hexagon_S2_asr_r_p>; +def Hexagon_S2_lsl_r_p: + di_SInst_disi <"lsl", int_hexagon_S2_lsl_r_p>; +def Hexagon_S2_lsr_r_p: + di_SInst_disi <"lsr", int_hexagon_S2_lsr_r_p>; + +// STYPE / SHIFT / Shift by register and accumulate. +def Hexagon_S2_asl_r_r_acc: + si_SInst_sisisi_acc <"asl", int_hexagon_S2_asl_r_r_acc>; +def Hexagon_S2_asr_r_r_acc: + si_SInst_sisisi_acc <"asr", int_hexagon_S2_asr_r_r_acc>; +def Hexagon_S2_lsl_r_r_acc: + si_SInst_sisisi_acc <"lsl", int_hexagon_S2_lsl_r_r_acc>; +def Hexagon_S2_lsr_r_r_acc: + si_SInst_sisisi_acc <"lsr", int_hexagon_S2_lsr_r_r_acc>; +def Hexagon_S2_asl_r_p_acc: + di_SInst_didisi_acc <"asl", int_hexagon_S2_asl_r_p_acc>; +def Hexagon_S2_asr_r_p_acc: + di_SInst_didisi_acc <"asr", int_hexagon_S2_asr_r_p_acc>; +def Hexagon_S2_lsl_r_p_acc: + di_SInst_didisi_acc <"lsl", int_hexagon_S2_lsl_r_p_acc>; +def Hexagon_S2_lsr_r_p_acc: + di_SInst_didisi_acc <"lsr", int_hexagon_S2_lsr_r_p_acc>; + +def Hexagon_S2_asl_r_r_nac: + si_SInst_sisisi_nac <"asl", int_hexagon_S2_asl_r_r_nac>; +def Hexagon_S2_asr_r_r_nac: + si_SInst_sisisi_nac <"asr", int_hexagon_S2_asr_r_r_nac>; +def Hexagon_S2_lsl_r_r_nac: + si_SInst_sisisi_nac <"lsl", int_hexagon_S2_lsl_r_r_nac>; +def Hexagon_S2_lsr_r_r_nac: + si_SInst_sisisi_nac <"lsr", int_hexagon_S2_lsr_r_r_nac>; +def Hexagon_S2_asl_r_p_nac: + di_SInst_didisi_nac <"asl", int_hexagon_S2_asl_r_p_nac>; +def Hexagon_S2_asr_r_p_nac: + di_SInst_didisi_nac <"asr", int_hexagon_S2_asr_r_p_nac>; +def Hexagon_S2_lsl_r_p_nac: + di_SInst_didisi_nac <"lsl", int_hexagon_S2_lsl_r_p_nac>; +def Hexagon_S2_lsr_r_p_nac: + di_SInst_didisi_nac <"lsr", int_hexagon_S2_lsr_r_p_nac>; + +// STYPE / SHIFT / Shift by register and logical. +def Hexagon_S2_asl_r_r_and: + si_SInst_sisisi_and <"asl", int_hexagon_S2_asl_r_r_and>; +def Hexagon_S2_asr_r_r_and: + si_SInst_sisisi_and <"asr", int_hexagon_S2_asr_r_r_and>; +def Hexagon_S2_lsl_r_r_and: + si_SInst_sisisi_and <"lsl", int_hexagon_S2_lsl_r_r_and>; +def Hexagon_S2_lsr_r_r_and: + si_SInst_sisisi_and <"lsr", int_hexagon_S2_lsr_r_r_and>; + +def Hexagon_S2_asl_r_r_or: + si_SInst_sisisi_or <"asl", int_hexagon_S2_asl_r_r_or>; +def Hexagon_S2_asr_r_r_or: + si_SInst_sisisi_or <"asr", int_hexagon_S2_asr_r_r_or>; +def Hexagon_S2_lsl_r_r_or: + si_SInst_sisisi_or <"lsl", int_hexagon_S2_lsl_r_r_or>; +def Hexagon_S2_lsr_r_r_or: + si_SInst_sisisi_or <"lsr", int_hexagon_S2_lsr_r_r_or>; + +def Hexagon_S2_asl_r_p_and: + di_SInst_didisi_and <"asl", int_hexagon_S2_asl_r_p_and>; +def Hexagon_S2_asr_r_p_and: + di_SInst_didisi_and <"asr", int_hexagon_S2_asr_r_p_and>; +def Hexagon_S2_lsl_r_p_and: + di_SInst_didisi_and <"lsl", int_hexagon_S2_lsl_r_p_and>; +def Hexagon_S2_lsr_r_p_and: + di_SInst_didisi_and <"lsr", int_hexagon_S2_lsr_r_p_and>; + +def Hexagon_S2_asl_r_p_or: + di_SInst_didisi_or <"asl", int_hexagon_S2_asl_r_p_or>; +def Hexagon_S2_asr_r_p_or: + di_SInst_didisi_or <"asr", int_hexagon_S2_asr_r_p_or>; +def Hexagon_S2_lsl_r_p_or: + di_SInst_didisi_or <"lsl", int_hexagon_S2_lsl_r_p_or>; +def Hexagon_S2_lsr_r_p_or: + di_SInst_didisi_or <"lsr", int_hexagon_S2_lsr_r_p_or>; + +// STYPE / SHIFT / Shift by register with saturation. +def Hexagon_S2_asl_r_r_sat: + si_SInst_sisi_sat <"asl", int_hexagon_S2_asl_r_r_sat>; +def Hexagon_S2_asr_r_r_sat: + si_SInst_sisi_sat <"asr", int_hexagon_S2_asr_r_r_sat>; + +// STYPE / SHIFT / Table Index. +def HEXAGON_S2_tableidxb_goodsyntax: + si_MInst_sisiu4u5 <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>; +def HEXAGON_S2_tableidxd_goodsyntax: + si_MInst_sisiu4u5 <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>; +def HEXAGON_S2_tableidxh_goodsyntax: + si_MInst_sisiu4u5 <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>; +def HEXAGON_S2_tableidxw_goodsyntax: + si_MInst_sisiu4u5 <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>; + + +/******************************************************************** +* STYPE/VH * +*********************************************************************/ + +// STYPE / VH / Vector absolute value halfwords. +// Rdd64=vabsh(Rss64) +def Hexagon_A2_vabsh: + di_SInst_di <"vabsh", int_hexagon_A2_vabsh>; +def Hexagon_A2_vabshsat: + di_SInst_di_sat <"vabsh", int_hexagon_A2_vabshsat>; + +// STYPE / VH / Vector shift halfwords by immediate. +// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32) +def Hexagon_S2_asl_i_vh: + di_SInst_disi <"vaslh", int_hexagon_S2_asl_i_vh>; +def Hexagon_S2_asr_i_vh: + di_SInst_disi <"vasrh", int_hexagon_S2_asr_i_vh>; +def Hexagon_S2_lsr_i_vh: + di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_i_vh>; + +// STYPE / VH / Vector shift halfwords by register. +// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32) +def Hexagon_S2_asl_r_vh: + di_SInst_disi <"vaslh", int_hexagon_S2_asl_r_vh>; +def Hexagon_S2_asr_r_vh: + di_SInst_disi <"vasrh", int_hexagon_S2_asr_r_vh>; +def Hexagon_S2_lsl_r_vh: + di_SInst_disi <"vlslh", int_hexagon_S2_lsl_r_vh>; +def Hexagon_S2_lsr_r_vh: + di_SInst_disi <"vlsrh", int_hexagon_S2_lsr_r_vh>; + + +/******************************************************************** +* STYPE/VW * +*********************************************************************/ + +// STYPE / VW / Vector absolute value words. +def Hexagon_A2_vabsw: + di_SInst_di <"vabsw", int_hexagon_A2_vabsw>; +def Hexagon_A2_vabswsat: + di_SInst_di_sat <"vabsw", int_hexagon_A2_vabswsat>; + +// STYPE / VW / Vector shift words by immediate. +// Rdd64=v[asl/vsl]w(Rss64,Rt32) +def Hexagon_S2_asl_i_vw: + di_SInst_disi <"vaslw", int_hexagon_S2_asl_i_vw>; +def Hexagon_S2_asr_i_vw: + di_SInst_disi <"vasrw", int_hexagon_S2_asr_i_vw>; +def Hexagon_S2_lsr_i_vw: + di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_i_vw>; + +// STYPE / VW / Vector shift words by register. +// Rdd64=v[asl/vsl]w(Rss64,Rt32) +def Hexagon_S2_asl_r_vw: + di_SInst_disi <"vaslw", int_hexagon_S2_asl_r_vw>; +def Hexagon_S2_asr_r_vw: + di_SInst_disi <"vasrw", int_hexagon_S2_asr_r_vw>; +def Hexagon_S2_lsl_r_vw: + di_SInst_disi <"vlslw", int_hexagon_S2_lsl_r_vw>; +def Hexagon_S2_lsr_r_vw: + di_SInst_disi <"vlsrw", int_hexagon_S2_lsr_r_vw>; + +// STYPE / VW / Vector shift words with truncate and pack. +def Hexagon_S2_asr_r_svw_trun: + si_SInst_disi <"vasrw", int_hexagon_S2_asr_r_svw_trun>; +def Hexagon_S2_asr_i_svw_trun: + si_SInst_diu5 <"vasrw", int_hexagon_S2_asr_i_svw_trun>; + +include "HexagonIntrinsicsV3.td" +include "HexagonIntrinsicsV4.td" diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td new file mode 100644 index 0000000..68eaf68 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td @@ -0,0 +1,29 @@ +//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Multiply 64-bit and use lower result +// +// Optimized with intrinisics accumulates +// +def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), + (COMBINE_rr + (Hexagon_M2_maci + (Hexagon_M2_maci (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)), + subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)), + (EXTRACT_SUBREG (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)), + subreg_loreg))>; + + + diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td new file mode 100644 index 0000000..2a54e62 --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td @@ -0,0 +1,50 @@ +//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format. +// +//===----------------------------------------------------------------------===// + + + + +// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary. +def Hexagon_M2_vrcmpys_s1: + di_MInst_disi_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1>; +def Hexagon_M2_vrcmpys_acc_s1: + di_MInst_didisi_acc_s1_sat <"vrcmpys", int_hexagon_M2_vrcmpys_acc_s1>; +def Hexagon_M2_vrcmpys_s1rp: + si_MInst_disi_s1_rnd_sat <"vrcmpys", int_hexagon_M2_vrcmpys_s1rp>; + + + + +/******************************************************************** +* MTYPE/VB * +*********************************************************************/ + +// MTYPE / VB / Vector reduce add unsigned bytes. +def Hexagon_M2_vradduh: + si_MInst_didi <"vradduh", int_hexagon_M2_vradduh>; + + +/******************************************************************** +* ALU64/ALU * +*********************************************************************/ + +// ALU64 / ALU / Add. +def Hexagon_A2_addsp: + di_ALU64_sidi <"add", int_hexagon_A2_addsp>; +def Hexagon_A2_addpsat: + di_ALU64_didi <"add", int_hexagon_A2_addpsat>; + +def Hexagon_A2_maxp: + di_ALU64_didi <"max", int_hexagon_A2_maxp>; +def Hexagon_A2_maxup: + di_ALU64_didi <"maxu", int_hexagon_A2_maxup>; diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td new file mode 100644 index 0000000..dd28ebb --- /dev/null +++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td @@ -0,0 +1,369 @@ +//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is populated based on the following specs: +// Hexagon V4 Architecture Extensions +// Application-Level Specification +// 80-V9418-12 Rev. A +// June 15, 2010 + + +// +// ALU 32 types. +// + +class si_ALU32_sisi_not<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class di_ALU32_s8si<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")), + [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>; + +class di_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_neg_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_neg_ALU32_sis10<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class qi_neg_ALU32_siu9<string opc, Intrinsic IntID> + : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_neg_ALU32_sisi<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class si_neg_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + +class si_ALU32_sis8<string opc, Intrinsic IntID> + : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + + +// +// SInst Classes. +// +class qi_neg_SInst_qiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2), + !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>; + +class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, !$src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, $src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, or($src2, !$src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID> + : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, or($src2, $src3)")), + [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_si_addsis6<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, add($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_SInst_si_subs6si<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, sub(#$src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, + IntRegs:$src3))]>; + +class di_ALU64_didi_neg<string opc, Intrinsic IntID> + : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>; + +class di_MInst_dididi_xacc<string opc, Intrinsic IntID> + : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2), + !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")), + [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1, + DoubleRegs:$src2))], + "$dst2 = $dst">; + +class si_MInst_sisisi_and<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_andn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_sisis10_andi<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3), + !strconcat("$dst = ", !strconcat(opc , + "($src1, and($src2, #$src3))")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2, + imm:$src3))]>; + +class si_MInst_sisisi_xor<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_xorn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_sisis10_or<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + imm:$src3))]>; + +class si_MInst_sisisi_or<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_MInst_sisisi_orn<string opc, Intrinsic IntID> + : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3), + !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")), + [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2, + IntRegs:$src3))]>; + +class si_SInst_siu5_sat<string opc, Intrinsic IntID> + : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2), + !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")), + [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>; + + +/******************************************************************** +* ALU32/ALU * +*********************************************************************/ + +// ALU32 / ALU / Logical Operations. +def Hexagon_A4_orn : si_ALU32_sisi_not <"or", int_hexagon_A4_orn>; +def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>; + + +/******************************************************************** +* ALU32/PERM * +*********************************************************************/ + +// ALU32 / PERM / Combine Words Into Doublewords. +def Hexagon_A4_combineir : di_ALU32_s8si <"combine", int_hexagon_A4_combineir>; +def Hexagon_A4_combineri : di_ALU32_sis8 <"combine", int_hexagon_A4_combineri>; + + +/******************************************************************** +* ALU32/PRED * +*********************************************************************/ + +// ALU32 / PRED / Conditional Shift Halfword. +// ALU32 / PRED / Conditional Sign Extend. +// ALU32 / PRED / Conditional Zero Extend. +// ALU32 / PRED / Compare. +def Hexagon_C4_cmpneq : qi_neg_ALU32_sisi <"cmp.eq", int_hexagon_C4_cmpneq>; +def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>; +def Hexagon_C4_cmplte : qi_neg_ALU32_sisi <"cmp.gt", int_hexagon_C4_cmplte>; +def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>; +def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi <"cmp.gtu",int_hexagon_C4_cmplteu>; +def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9 <"cmp.gtu",int_hexagon_C4_cmplteui>; + +// ALU32 / PRED / cmpare To General Register. +def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>; +def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>; +def Hexagon_A4_rcmpeq : si_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpeq>; +def Hexagon_A4_rcmpeqi : si_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpeqi>; + + +/******************************************************************** +* CR * +*********************************************************************/ + +// CR / Corner Detection Acceleration. +def Hexagon_C4_fastcorner9: + qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>; +def Hexagon_C4_fastcorner9_not: + qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>; + +// CR / Logical Operations On Predicates. +def Hexagon_C4_and_andn: + qi_SInst_qi_andqiqi_neg <"and", int_hexagon_C4_and_andn>; +def Hexagon_C4_and_and: + qi_SInst_qi_andqiqi <"and", int_hexagon_C4_and_and>; +def Hexagon_C4_and_orn: + qi_SInst_qi_orqiqi_neg <"and", int_hexagon_C4_and_orn>; +def Hexagon_C4_and_or: + qi_SInst_qi_orqiqi <"and", int_hexagon_C4_and_or>; +def Hexagon_C4_or_andn: + qi_SInst_qi_andqiqi_neg <"or", int_hexagon_C4_or_andn>; +def Hexagon_C4_or_and: + qi_SInst_qi_andqiqi <"or", int_hexagon_C4_or_and>; +def Hexagon_C4_or_orn: + qi_SInst_qi_orqiqi_neg <"or", int_hexagon_C4_or_orn>; +def Hexagon_C4_or_or: + qi_SInst_qi_orqiqi <"or", int_hexagon_C4_or_or>; + + +/******************************************************************** +* XTYPE/ALU * +*********************************************************************/ + +// XTYPE / ALU / Add And Accumulate. +def Hexagon_S4_addaddi: + si_SInst_si_addsis6 <"add", int_hexagon_S4_addaddi>; +def Hexagon_S4_subaddi: + si_SInst_si_subs6si <"add", int_hexagon_S4_subaddi>; + +// XTYPE / ALU / Logical Doublewords. +def Hexagon_S4_andnp: + di_ALU64_didi_neg <"and", int_hexagon_A4_andnp>; +def Hexagon_S4_ornp: + di_ALU64_didi_neg <"or", int_hexagon_A4_ornp>; + +// XTYPE / ALU / Logical-logical Doublewords. +def Hexagon_M4_xor_xacc: + di_MInst_dididi_xacc <"xor", int_hexagon_M4_xor_xacc>; + +// XTYPE / ALU / Logical-logical Words. +def HEXAGON_M4_and_and: + si_MInst_sisisi_and <"and", int_hexagon_M4_and_and>; +def HEXAGON_M4_and_or: + si_MInst_sisisi_and <"or", int_hexagon_M4_and_or>; +def HEXAGON_M4_and_xor: + si_MInst_sisisi_and <"xor", int_hexagon_M4_and_xor>; +def HEXAGON_M4_and_andn: + si_MInst_sisisi_andn <"and", int_hexagon_M4_and_andn>; +def HEXAGON_M4_xor_and: + si_MInst_sisisi_xor <"and", int_hexagon_M4_xor_and>; +def HEXAGON_M4_xor_or: + si_MInst_sisisi_xor <"or", int_hexagon_M4_xor_or>; +def HEXAGON_M4_xor_andn: + si_MInst_sisisi_xorn <"and", int_hexagon_M4_xor_andn>; +def HEXAGON_M4_or_and: + si_MInst_sisisi_or <"and", int_hexagon_M4_or_and>; +def HEXAGON_M4_or_or: + si_MInst_sisisi_or <"or", int_hexagon_M4_or_or>; +def HEXAGON_M4_or_xor: + si_MInst_sisisi_or <"xor", int_hexagon_M4_or_xor>; +def HEXAGON_M4_or_andn: + si_MInst_sisisi_orn <"and", int_hexagon_M4_or_andn>; +def HEXAGON_S4_or_andix: + si_SInst_sisis10_andi <"or", int_hexagon_S4_or_andix>; +def HEXAGON_S4_or_andi: + si_SInst_sisis10_or <"and", int_hexagon_S4_or_andi>; +def HEXAGON_S4_or_ori: + si_SInst_sisis10_or <"or", int_hexagon_S4_or_ori>; + +// XTYPE / ALU / Modulo wrap. +def HEXAGON_A4_modwrapu: + si_ALU64_sisi <"modwrap", int_hexagon_A4_modwrapu>; + +// XTYPE / ALU / Round. +def HEXAGON_A4_cround_ri: + si_SInst_siu5 <"cround", int_hexagon_A4_cround_ri>; +def HEXAGON_A4_cround_rr: + si_SInst_sisi <"cround", int_hexagon_A4_cround_rr>; +def HEXAGON_A4_round_ri: + si_SInst_siu5 <"round", int_hexagon_A4_round_ri>; +def HEXAGON_A4_round_rr: + si_SInst_sisi <"round", int_hexagon_A4_round_rr>; +def HEXAGON_A4_round_ri_sat: + si_SInst_siu5_sat <"round", int_hexagon_A4_round_ri_sat>; +def HEXAGON_A4_round_rr_sat: + si_SInst_sisi_sat <"round", int_hexagon_A4_round_rr_sat>; + +// XTYPE / ALU / Vector reduce add unsigned halfwords. +// XTYPE / ALU / Vector add bytes. +// XTYPE / ALU / Vector conditional negate. +// XTYPE / ALU / Vector maximum bytes. +// XTYPE / ALU / Vector reduce maximum halfwords. +// XTYPE / ALU / Vector reduce maximum words. +// XTYPE / ALU / Vector minimum bytes. +// XTYPE / ALU / Vector reduce minimum halfwords. +// XTYPE / ALU / Vector reduce minimum words. +// XTYPE / ALU / Vector subtract bytes. + + +/******************************************************************** +* XTYPE/BIT * +*********************************************************************/ + +// XTYPE / BIT / Count leading. +// XTYPE / BIT / Count trailing. +// XTYPE / BIT / Extract bitfield. +// XTYPE / BIT / Masked parity. +// XTYPE / BIT / Bit reverse. +// XTYPE / BIT / Split bitfield. + + +/******************************************************************** +* XTYPE/COMPLEX * +*********************************************************************/ + +// XTYPE / COMPLEX / Complex add/sub halfwords. +// XTYPE / COMPLEX / Complex add/sub words. +// XTYPE / COMPLEX / Complex multiply 32x16. +// XTYPE / COMPLEX / Vector reduce complex rotate. + + +/******************************************************************** +* XTYPE/MPY * +*********************************************************************/ + +// XTYPE / COMPLEX / Complex add/sub halfwords. diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h new file mode 100644 index 0000000..0318c51 --- /dev/null +++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -0,0 +1,75 @@ +//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonMACHINEFUNCTIONINFO_H +#define HexagonMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + + namespace Hexagon { + const unsigned int StartPacket = 0x1; + const unsigned int EndPacket = 0x2; + } + + +/// Hexagon target-specific information for each MachineFunction. +class HexagonMachineFunctionInfo : public MachineFunctionInfo { + // SRetReturnReg - Some subtargets require that sret lowering includes + // returning the value of the returned struct in a register. This field + // holds the virtual register into which the sret argument is passed. + unsigned SRetReturnReg; + std::vector<MachineInstr*> AllocaAdjustInsts; + int VarArgsFrameIndex; + bool HasClobberLR; + + std::map<const MachineInstr*, unsigned> PacketInfo; + + +public: + HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {} + + HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), + HasClobberLR(0) {} + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void addAllocaAdjustInst(MachineInstr* MI) { + AllocaAdjustInsts.push_back(MI); + } + const std::vector<MachineInstr*>& getAllocaAdjustInsts() { + return AllocaAdjustInsts; + } + + void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; } + int getVarArgsFrameIndex() { return VarArgsFrameIndex; } + + void setStartPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::StartPacket; + } + void setEndPacket(MachineInstr* MI) { + PacketInfo[MI] |= Hexagon::EndPacket; + } + bool isStartPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::StartPacket)); + } + bool isEndPacket(const MachineInstr* MI) const { + return (PacketInfo.count(MI) && + (PacketInfo.find(MI)->second & Hexagon::EndPacket)); + } + void setHasClobberLR(bool v) { HasClobberLR = v; } + bool hasClobberLR() const { return HasClobberLR; } + +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp new file mode 100644 index 0000000..1229aca --- /dev/null +++ b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp @@ -0,0 +1,129 @@ +//===-- HexagonOptimizeSZExtends.cpp - Identify and remove sign and -------===// +//===-- zero extends. -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/PassSupport.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Debug.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +#include "Hexagon.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +namespace { + struct HexagonOptimizeSZExtends : public MachineFunctionPass { + + public: + static char ID; + HexagonOptimizeSZExtends() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "Hexagon remove redundant zero and size extends"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + }; +} + +char HexagonOptimizeSZExtends::ID = 0; + +// This is a brain dead pass to get rid of redundant sign extends for the +// following case: +// +// Transform the following pattern +// %vreg170<def> = SXTW %vreg166 +// ... +// %vreg176<def> = COPY %vreg170:subreg_loreg +// +// Into +// %vreg176<def> = COPY vreg166 + +bool HexagonOptimizeSZExtends::runOnMachineFunction(MachineFunction &MF) { + DenseMap<unsigned, unsigned> SExtMap; + + // Loop over all of the basic blocks + for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + SExtMap.clear(); + + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + // Look for sign extends: + // %vreg170<def> = SXTW %vreg166 + if (MI->getOpcode() == Hexagon::SXTW) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + // Just handle virtual registers. + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Map the following: + // %vreg170<def> = SXTW %vreg166 + // SExtMap[170] = vreg166 + SExtMap[DstReg] = SrcReg; + } + } + // Look for copy: + // %vreg176<def> = COPY %vreg170:subreg_loreg + if (MI->isCopy()) { + assert (MI->getNumOperands() == 2); + MachineOperand &Dst = MI->getOperand(0); + MachineOperand &Src = MI->getOperand(1); + + // Make sure we are copying the lower 32 bits. + if (Src.getSubReg() != Hexagon::subreg_loreg) + continue; + + unsigned DstReg = Dst.getReg(); + unsigned SrcReg = Src.getReg(); + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Try to find in the map. + if (unsigned SextSrc = SExtMap.lookup(SrcReg)) { + // Change the 1st operand. + MI->RemoveOperand(1); + MI->addOperand(MachineOperand::CreateReg(SextSrc, false)); + } + } + } + } + } + return true; +} + +FunctionPass *llvm::createHexagonOptimizeSZExtends() { + return new HexagonOptimizeSZExtends(); +} diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp new file mode 100644 index 0000000..521e0c1 --- /dev/null +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -0,0 +1,323 @@ +//==- HexagonRegisterInfo.cpp - Hexagon Register Information -----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <iostream> + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Function.h" +using namespace llvm; + + +HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st, + const HexagonInstrInfo &tii) + : HexagonGenRegisterInfo(Hexagon::R31), + Subtarget(st), + TII(tii) { +} + +const unsigned* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction + *MF) + const { + static const unsigned CalleeSavedRegsV2[] = { + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + static const unsigned CalleeSavedRegsV3[] = { + Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, + Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, + Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 + }; + + switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V2: + return CalleeSavedRegsV2; + break; + case HexagonSubtarget::V3: + case HexagonSubtarget::V4: + return CalleeSavedRegsV3; + break; + default: + const char *ErrorString = + "Callee saved registers requested for unknown archtecture version"; + llvm_unreachable(ErrorString); + } +} + +BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) + const { + BitVector Reserved(getNumRegs()); + Reserved.set(HEXAGON_RESERVED_REG_1); + Reserved.set(HEXAGON_RESERVED_REG_2); + Reserved.set(Hexagon::R29); + Reserved.set(Hexagon::R30); + Reserved.set(Hexagon::R31); + Reserved.set(Hexagon::D14); + Reserved.set(Hexagon::D15); + Reserved.set(Hexagon::LC0); + Reserved.set(Hexagon::LC1); + Reserved.set(Hexagon::SA0); + Reserved.set(Hexagon::SA1); + return Reserved; +} + + +const TargetRegisterClass* const* +HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = { + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + }; + static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = { + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + &Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass, + }; + + switch(Subtarget.getHexagonArchVersion()) { + case HexagonSubtarget::V2: + return CalleeSavedRegClassesV2; + break; + case HexagonSubtarget::V3: + case HexagonSubtarget::V4: + return CalleeSavedRegClassesV3; + break; + default: + const char *ErrorString = + "Callee saved register classes requested for unknown archtecture version"; + llvm_unreachable(ErrorString); + } +} + +void HexagonRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MachineInstr &MI = *I; + + if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) { + // Hexagon_TODO: add code + } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) { + // Hexagon_TODO: add code + } else { + assert(0 && "Cannot handle this call frame pseudo instruction"); + } + MBB.erase(I); +} + +void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS) const { + + // + // Hexagon_TODO: Do we need to enforce this for Hexagon? + assert(SPAdj == 0 && "Unexpected"); + + + unsigned i = 0; + MachineInstr &MI = *II; + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + // Addressable stack objects are accessed using neg. offsets from %fp. + MachineFunction &MF = *MI.getParent()->getParent(); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + unsigned FrameReg = getFrameRegister(MF); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (!TFI->hasFP(MF)) { + // We will not reserve space on the stack for the lr and fp registers. + Offset -= 2 * Hexagon_WordSize; + } + + const unsigned FrameSize = MFI.getStackSize(); + + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) && + !TII.isSpillPredRegOp(&MI)) { + // Replace frame index with a stack pointer reference. + MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true); + MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + } else { + // Replace frame index with a frame pointer reference. + if (!TII.isValidOffset(MI.getOpcode(), Offset)) { + + // If the offset overflows, then correct it. + // + // For loads, we do not need a reserved register + // r0 = memw(r30 + #10000) to: + // + // r0 = add(r30, #10000) + // r0 = memw(r0) + if ( (MI.getOpcode() == Hexagon::LDriw) || + (MI.getOpcode() == Hexagon::LDrid) || + (MI.getOpcode() == Hexagon::LDrih) || + (MI.getOpcode() == Hexagon::LDriuh) || + (MI.getOpcode() == Hexagon::LDrib) || + (MI.getOpcode() == Hexagon::LDriub) ) { + unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ? + *getSubRegisters(MI.getOperand(0).getReg()) : + MI.getOperand(0).getReg(); + + // Check if offset can fit in addi. + if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + dstReg).addReg(FrameReg).addReg(dstReg); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + dstReg).addReg(FrameReg).addImm(Offset); + } + + MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } else if ((MI.getOpcode() == Hexagon::STriw) || + (MI.getOpcode() == Hexagon::STrid) || + (MI.getOpcode() == Hexagon::STrih) || + (MI.getOpcode() == Hexagon::STrib) || + (MI.getOpcode() == Hexagon::STriwt)) { + // For stores, we need a reserved register. Change + // memw(r30 + #10000) = r0 to: + // + // rs = add(r30, #10000); + // memw(rs) = r0 + unsigned resReg = HEXAGON_RESERVED_REG_1; + + // Check if offset can fit in addi. + if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + resReg).addReg(FrameReg).addReg(resReg); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + resReg).addReg(FrameReg).addImm(Offset); + } + MI.getOperand(i).ChangeToRegister(resReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } else if (TII.isMemOp(&MI)) { + unsigned resReg = HEXAGON_RESERVED_REG_1; + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { + MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, + true); + MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset); + } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + resReg).addReg(FrameReg).addReg(resReg); + MI.getOperand(i).ChangeToRegister(resReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), + resReg).addReg(FrameReg).addImm(Offset); + MI.getOperand(i).ChangeToRegister(resReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } + } else { + unsigned dstReg = MI.getOperand(0).getReg(); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), + dstReg).addReg(FrameReg).addReg(dstReg); + // Can we delete MI??? r2 = add (r2, #0). + MI.getOperand(i).ChangeToRegister(dstReg, false, false, true); + MI.getOperand(i+1).ChangeToImmediate(0); + } + } else { + // If the offset is small enough to fit in the immediate field, directly + // encode it. + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset); + } + } + +} + +unsigned HexagonRegisterInfo::getRARegister() const { + return Hexagon::R31; +} + +unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction + &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (TFI->hasFP(MF)) { + return Hexagon::R30; + } + + return Hexagon::R29; +} + +unsigned HexagonRegisterInfo::getFrameRegister() const { + return Hexagon::R30; +} + +unsigned HexagonRegisterInfo::getStackRegister() const { + return Hexagon::R29; +} + +void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove> + &Moves) const +{ + // VirtualFP = (R30 + #0). + unsigned FPReg = getFrameRegister(); + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(FPReg, 0); + Moves.push_back(MachineMove(0, Dst, Src)); +} + +unsigned HexagonRegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned HexagonRegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +#define GET_REGINFO_TARGET_DESC +#include "HexagonGenRegisterInfo.inc" diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h new file mode 100644 index 0000000..33b0c14 --- /dev/null +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -0,0 +1,89 @@ +//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Hexagon implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonREGISTERINFO_H +#define HexagonREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#define GET_REGINFO_HEADER +#include "HexagonGenRegisterInfo.inc" +#include "llvm/MC/MachineLocation.h" + +// +// We try not to hard code the reserved registers in our code, +// so the following two macros were defined. However, there +// are still a few places that R11 and R10 are hard wired. +// See below. If, in the future, we decided to change the reserved +// register. Don't forget changing the following places. +// +// 1. the "Defs" set of STriw_pred in HexagonInstrInfo.td +// 2. the "Defs" set of LDri_pred in HexagonInstrInfo.td +// 3. the definition of "IntRegs" in HexagonRegisterInfo.td +// 4. the definition of "DoubleRegs" in HexagonRegisterInfo.td +// +#define HEXAGON_RESERVED_REG_1 Hexagon::R10 +#define HEXAGON_RESERVED_REG_2 Hexagon::R11 + +namespace llvm { + +class HexagonSubtarget; +class HexagonInstrInfo; +class Type; + +struct HexagonRegisterInfo : public HexagonGenRegisterInfo { + HexagonSubtarget &Subtarget; + const HexagonInstrInfo &TII; + + HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + /// determineFrameLayout - Determine the size of the frame and maximum call + /// frame size. + void determineFrameLayout(MachineFunction &MF) const; + + /// requiresRegisterScavenging - returns true since we may need scavenging for + /// a temporary register when generating hardware loop instructions. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister() const; + void getInitialFrameState(std::vector<MachineMove> &Moves) const; + unsigned getStackRegister() const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td new file mode 100644 index 0000000..c05f844 --- /dev/null +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -0,0 +1,169 @@ +//===- HexagonRegisterInfo.td - Hexagon Register defs ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the Hexagon register file. +//===----------------------------------------------------------------------===// + +class HexagonReg<string n> : Register<n> { + field bits<5> Num; + let Namespace = "Hexagon"; +} + +class HexagonDoubleReg<string n, list<Register> subregs> : + RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + let Namespace = "Hexagon"; +} + +// Registers are identified with 5-bit ID numbers. +// Ri - 32-bit integer registers. +class Ri<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + +// Rf - 32-bit floating-point registers. +class Rf<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + + +// Rd - 64 bit registers. +class Rd<bits<5> num, string n, list<Register> subregs> : +HexagonDoubleReg<n, subregs> { + let Num = num; + let SubRegs = subregs; +} + + +class Rp<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + +class Rc<bits<5> num, string n> : HexagonReg<n> { + let Num = num; +} + +let Namespace = "Hexagon" in { + + def subreg_loreg : SubRegIndex; + def subreg_hireg : SubRegIndex; + + // Integer registers. + def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>; + def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>; + def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>; + def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>; + def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>; + def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>; + def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>; + def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>; + def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>; + def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>; + def R10 : Ri<10, "r10">, DwarfRegNum<[10]>; + def R11 : Ri<11, "r11">, DwarfRegNum<[11]>; + def R12 : Ri<12, "r12">, DwarfRegNum<[12]>; + def R13 : Ri<13, "r13">, DwarfRegNum<[13]>; + def R14 : Ri<14, "r14">, DwarfRegNum<[14]>; + def R15 : Ri<15, "r15">, DwarfRegNum<[15]>; + def R16 : Ri<16, "r16">, DwarfRegNum<[16]>; + def R17 : Ri<17, "r17">, DwarfRegNum<[17]>; + def R18 : Ri<18, "r18">, DwarfRegNum<[18]>; + def R19 : Ri<19, "r19">, DwarfRegNum<[19]>; + def R20 : Ri<20, "r20">, DwarfRegNum<[20]>; + def R21 : Ri<21, "r21">, DwarfRegNum<[21]>; + def R22 : Ri<22, "r22">, DwarfRegNum<[22]>; + def R23 : Ri<23, "r23">, DwarfRegNum<[23]>; + def R24 : Ri<24, "r24">, DwarfRegNum<[24]>; + def R25 : Ri<25, "r25">, DwarfRegNum<[25]>; + def R26 : Ri<26, "r26">, DwarfRegNum<[26]>; + def R27 : Ri<27, "r27">, DwarfRegNum<[27]>; + def R28 : Ri<28, "r28">, DwarfRegNum<[28]>; + def R29 : Ri<29, "r29">, DwarfRegNum<[29]>; + def R30 : Ri<30, "r30">, DwarfRegNum<[30]>; + def R31 : Ri<31, "r31">, DwarfRegNum<[31]>; + + + def PC : Ri<31, "r31">, DwarfRegNum<[32]>; + def GP : Ri<31, "r31">, DwarfRegNum<[33]>; + + // Aliases of the R* registers used to hold 64-bit int values (doubles). + let SubRegIndices = [subreg_loreg, subreg_hireg] in { + def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>; + def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>; + def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>; + def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>; + def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>; + def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>; + def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>; + def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>; + def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>; + def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>; + def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>; + def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>; + def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>; + def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>; + def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>; + def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>; + } + + // Predicate registers. + def P0 : Rp< 0, "p0">, DwarfRegNum<[63]>; + def P1 : Rp< 0, "p1">, DwarfRegNum<[64]>; + def P2 : Rp< 0, "p2">, DwarfRegNum<[65]>; + def P3 : Rp< 0, "p3">, DwarfRegNum<[66]>; + + // Control registers. + def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>; + def LC0 : Rc<0, "lc0">, DwarfRegNum<[68]>; + + def SA1 : Rc<0, "sa1">, DwarfRegNum<[69]>; + def LC1 : Rc<0, "lc1">, DwarfRegNum<[70]>; +} + + + + + + + + + + +// Register classes. +// +// FIXME: the register order should be defined in terms of the preferred +// allocation order... +// +def IntRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "R%u", 0, 9), + (sequence "R%u", 12, 28), + R10, R11, R29, R30, + R31)> { +} + + + +def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, (add (sequence "D%u", 0, + 4), + (sequence "D%u", 6, 13), + D5, D14, D15)> { + let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)]; +} + + +def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))> +{ + let Size = 32; +} + +def CRRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "LC%u", 0, 1), + (sequence "SA%u", 0, 1), + PC)> { + let Size = 32; +} diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp new file mode 100644 index 0000000..3ca257f --- /dev/null +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -0,0 +1,85 @@ +//=- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends --=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass that removes sign extends for function parameters. These parameters +// are already sign extended by the caller per Hexagon's ABI +// +//===----------------------------------------------------------------------===// + + + +#include "llvm/Pass.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "HexagonTargetMachine.h" +#include <iostream> + +using namespace llvm; +namespace { + struct HexagonRemoveExtendArgs : public FunctionPass { + public: + static char ID; + HexagonRemoveExtendArgs() : FunctionPass(ID) {} + virtual bool runOnFunction(Function &F); + + const char *getPassName() const { + return "Remove sign extends"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + FunctionPass::getAnalysisUsage(AU); + } + }; +} + +char HexagonRemoveExtendArgs::ID = 0; +RegisterPass<HexagonRemoveExtendArgs> X("reargs", + "Remove Sign and Zero Extends for Args" + ); + + + +bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { + unsigned Idx = 1; + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; + ++AI, ++Idx) { + if (F.paramHasAttr(Idx, Attribute::SExt)) { + Argument* Arg = AI; + if (!isa<PointerType>(Arg->getType())) { + for (Instruction::use_iterator UI = Arg->use_begin(); + UI != Arg->use_end();) { + if (isa<SExtInst>(*UI)) { + Instruction* Use = cast<Instruction>(*UI); + SExtInst* SI = new SExtInst(Arg, Use->getType()); + assert (EVT::getEVT(SI->getType()) == + (EVT::getEVT(Use->getType()))); + ++UI; + Use->replaceAllUsesWith(SI); + Instruction* First = F.getEntryBlock().begin(); + SI->insertBefore(First); + Use->eraseFromParent(); + } else { + ++UI; + } + } + } + } + } + return true; +} + + + +FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) { + return new HexagonRemoveExtendArgs(); +} diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td new file mode 100644 index 0000000..427d1cb --- /dev/null +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -0,0 +1,53 @@ +//===-HexagonSchedule.td - Hexagon Scheduling Definitions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Functional Units +def LUNIT : FuncUnit; +def LSUNIT : FuncUnit; +def MUNIT : FuncUnit; +def SUNIT : FuncUnit; + + +// Itinerary classes +def ALU32 : InstrItinClass; +def ALU64 : InstrItinClass; +def CR : InstrItinClass; +def J : InstrItinClass; +def JR : InstrItinClass; +def LD : InstrItinClass; +def M : InstrItinClass; +def ST : InstrItinClass; +def S : InstrItinClass; +def PSEUDO : InstrItinClass; + + +def HexagonItineraries : + ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [ + InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>, + InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>, + InstrItinData<CR , [InstrStage<1, [SUNIT]>]>, + InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>, + InstrItinData<JR , [InstrStage<1, [MUNIT]>]>, + InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>, + InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>, + InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>, + InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]> +]>; + + +//===----------------------------------------------------------------------===// +// V4 Machine Info + +//===----------------------------------------------------------------------===// + +include "HexagonScheduleV4.td" + +//===----------------------------------------------------------------------===// +// V4 Machine Info - +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td new file mode 100644 index 0000000..4cf66fe --- /dev/null +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -0,0 +1,56 @@ +//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. +// This file describes that machine information. + +// +// |===========|==================================================| +// | PIPELINE | Instruction Classes | +// |===========|==================================================| +// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | +// |-----------|--------------------------------------------------| +// | SLOT1 | LD ST ALU32 | +// |-----------|--------------------------------------------------| +// | SLOT2 | XTYPE ALU32 J JR | +// |-----------|--------------------------------------------------| +// | SLOT3 | XTYPE ALU32 J CR | +// |===========|==================================================| + + +// Functional Units. +def SLOT0 : FuncUnit; +def SLOT1 : FuncUnit; +def SLOT2 : FuncUnit; +def SLOT3 : FuncUnit; + +// Itinerary classes. +def NV_V4 : InstrItinClass; +def MEM_V4 : InstrItinClass; +// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. + +def HexagonItinerariesV4 : ProcessorItineraries< + [SLOT0, SLOT1, SLOT2, SLOT3], [], [ + InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>, + InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>, + InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<JR , [InstrStage<1, [SLOT2]>]>, + InstrItinData<CR , [InstrStage<1, [SLOT3]>]>, + InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]> +]>; + +//===----------------------------------------------------------------------===// +// Hexagon V4 Resource Definitions - +//===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td new file mode 100644 index 0000000..f21d928 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td @@ -0,0 +1,121 @@ +//=-HexagoSelectCCInfo.td - Selectcc mappings ----------------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +// +// selectcc mappings. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGT)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGT)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)), + IntRegs:$tval, IntRegs:$fval))>; + + + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULT)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLT)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, + (ADD_ri IntRegs:$rhs, -1)))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETLE)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETULE)), + (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for greater-equal-to Rs => greater-than Rs-1. +// +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETGE)), + (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + +def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETUGE)), + (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))), + IntRegs:$tval, IntRegs:$fval))>; + + + +// +// selectcc mappings for predicate comparisons. +// +// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into: +// pt = not(p1 xor p2) +// Rd = mux(pt, true_val, false_val) +// and similarly for SETNE +// +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETNE)), + (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval, + IntRegs:$fval))>; + +def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval, + IntRegs:$fval, SETEQ)), + (i32 (MUX_rr (i1 (NOT_pp (XOR_pp PredRegs:$lhs, PredRegs:$rhs))), + IntRegs:$tval, IntRegs:$fval))>; + + +// +// selectcc mappings for 64-bit operands are messy. Hexagon does not have a +// MUX64 o, use this: +// selectcc(Rss, Rdd, tval, fval, cond) -> +// combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi), +// mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo)) + +// setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETGT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; + + +// setlt-64 -> setgt-64. +def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval, + DoubleRegs:$fval, SETLT)), + (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)), + (MUX_rr (CMPGT64rr DoubleRegs:$lhs, + (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))), + (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg), + (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>; diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp new file mode 100644 index 0000000..a52c604 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -0,0 +1,46 @@ +//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HexagonSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-selectiondag-info" +#include "HexagonTargetMachine.h" +using namespace llvm; + +bool llvm::flag_aligned_memcpy; + +HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine + &TM) + : TargetSelectionDAGInfo(TM) { +} + +HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() { +} + +SDValue +HexagonSelectionDAGInfo:: +EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + flag_aligned_memcpy = false; + if ((Align & 0x3) == 0) { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + uint64_t SizeVal = ConstantSize->getZExtValue(); + if ((SizeVal > 32) && ((SizeVal % 8) == 0)) + flag_aligned_memcpy = true; + } + } + + return SDValue(); +} diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h new file mode 100644 index 0000000..86fa026 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -0,0 +1,40 @@ +//=-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Hexagon subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonSELECTIONDAGINFO_H +#define HexagonSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class HexagonTargetMachine; + +class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM); + ~HexagonSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; +}; + +} + +#endif diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp new file mode 100644 index 0000000..f4d3647 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -0,0 +1,136 @@ +//===---- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// +//===----------------------------------------------------------------------===//// +// This pass tries to provide opportunities for better optimization of muxes. +// The default code generated for something like: flag = (a == b) ? 1 : 3; +// would be: +// +// {p0 = cmp.eq(r0,r1)} +// {r3 = mux(p0,#1,#3)} +// +// This requires two packets. If we use .new predicated immediate transfers, +// then we can do this in a single packet, e.g.: +// +// {p0 = cmp.eq(r0,r1) +// if (p0.new) r3 = #1 +// if (!p0.new) r3 = #3} +// +// Note that the conditional assignments are not generated in .new form here. +// We assume opptimisically that they will be formed later. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xfer" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "HexagonTargetMachine.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" +#include <map> +#include <iostream> + +#include "llvm/Support/CommandLine.h" +#define DEBUG_TYPE "xfer" + + +using namespace llvm; + +namespace { + +class HexagonSplitTFRCondSets : public MachineFunctionPass { + HexagonTargetMachine& QTM; + const HexagonSubtarget &QST; + + public: + static char ID; + HexagonSplitTFRCondSets(HexagonTargetMachine& TM) : + MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + + const char *getPassName() const { + return "Hexagon Split TFRCondSets"; + } + bool runOnMachineFunction(MachineFunction &Fn); +}; + + +char HexagonSplitTFRCondSets::ID = 0; + + +bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { + + const TargetInstrInfo *TII = QTM.getInstrInfo(); + + // Loop over all of the basic blocks. + for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end(); + MBBb != MBBe; ++MBBb) { + MachineBasicBlock* MBB = MBBb; + // Traverse the basic block. + for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); + ++MII) { + MachineInstr *MI = MII; + int Opc = MI->getOpcode(); + if (Opc == Hexagon::TFR_condset_rr) { + + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(2).getReg(); + int SrcReg2 = MI->getOperand(3).getReg(); + + // Minor optimization: do not emit the predicated copy if the source and + // the destination is the same register + if (DestReg != SrcReg1) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1); + } + if (DestReg != SrcReg2) { + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt), + DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2); + } + MII = MBB->erase(MI); + --MII; + } else if (Opc == Hexagon::TFR_condset_ii) { + int DestReg = MI->getOperand(0).getReg(); + int SrcReg1 = MI->getOperand(1).getReg(); + int Immed1 = MI->getOperand(2).getImm(); + int Immed2 = MI->getOperand(3).getImm(); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt), + DestReg).addReg(SrcReg1).addImm(Immed1); + BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt), + DestReg).addReg(SrcReg1).addImm(Immed2); + MII = MBB->erase(MI); + --MII; + } + } + } + + return true; +} + +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) { + return new HexagonSplitTFRCondSets(TM); +} diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp new file mode 100644 index 0000000..83fb498 --- /dev/null +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -0,0 +1,59 @@ +//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "HexagonSubtarget.h" +#include "Hexagon.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "HexagonGenSubtargetInfo.inc" + +static cl::opt<bool> +EnableV3("enable-hexagon-v3", cl::Hidden, + cl::desc("Enable Hexagon V3 instructions.")); + +static cl::opt<bool> +EnableMemOps( + "enable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, + cl::desc("Generate V4 MEMOP in code generation for Hexagon target")); + +HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): + HexagonGenSubtargetInfo(TT, CPU, FS), + HexagonArchVersion(V1), + CPUString(CPU.str()) { + ParseSubtargetFeatures(CPU, FS); + + switch(HexagonArchVersion) { + case HexagonSubtarget::V2: + break; + case HexagonSubtarget::V3: + EnableV3 = true; + break; + case HexagonSubtarget::V4: + break; + default: + llvm_unreachable("Unknown Architecture Version."); + } + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUString); + + if (EnableMemOps) + UseMemOps = true; + else + UseMemOps = false; +} diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h new file mode 100644 index 0000000..6de85df --- /dev/null +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -0,0 +1,74 @@ +//==-- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef Hexagon_SUBTARGET_H +#define Hexagon_SUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "HexagonGenSubtargetInfo.inc" + +#define Hexagon_SMALL_DATA_THRESHOLD 8 + +namespace llvm { + +class HexagonSubtarget : public HexagonGenSubtargetInfo { + + bool UseMemOps; + +public: + enum HexagonArchEnum { + V1, V2, V3, V4 + }; + + HexagonArchEnum HexagonArchVersion; + std::string CPUString; + InstrItineraryData InstrItins; + +public: + HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS); + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool hasV2TOps () const { return HexagonArchVersion >= V2; } + bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; } + bool hasV3TOps () const { return HexagonArchVersion >= V3; } + bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; } + bool hasV4TOps () const { return HexagonArchVersion >= V4; } + bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; } + + bool isSubtargetV2() const { return HexagonArchVersion == V2;} + const std::string &getCPUString () const { return CPUString; } + + // Threshold for small data section + unsigned getSmallDataThreshold() const { + return Hexagon_SMALL_DATA_THRESHOLD; + } + const HexagonArchEnum &getHexagonArchVersion() const { + return HexagonArchVersion; + } +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp new file mode 100644 index 0000000..b29e92c --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -0,0 +1,118 @@ +//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "HexagonTargetMachine.h" +#include "Hexagon.h" +#include "HexagonISelLowering.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/TargetRegistry.h" +#include <iostream> + +using namespace llvm; + +static cl:: +opt<bool> DisableHardwareLoops( + "disable-hexagon-hwloops", cl::Hidden, + cl::desc("Disable Hardware Loops for Hexagon target")); + +/// HexagonTargetMachineModule - Note that this is used on hosts that +/// cannot link in a library unless there are references into the +/// library. In particular, it seems that it is not possible to get +/// things to work on Win32 without this. Though it is unused, do not +/// remove it. +extern "C" int HexagonTargetMachineModule; +int HexagonTargetMachineModule = 0; + +extern "C" void LLVMInitializeHexagonTarget() { + // Register the target. + RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget); +} + + +/// HexagonTargetMachine ctor - Create an ILP32 architecture model. +/// + +/// Hexagon_TODO: Do I need an aggregate alignment? +/// +HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") , + Subtarget(TT, CPU, FS), TLInfo(*this), InstrInfo(Subtarget), + TSInfo(*this), + FrameLowering(Subtarget), + InstrItins(&Subtarget.getInstrItineraryData()) { + setMCUseCFI(false); +} + +// addPassesForOptimizations - Allow the backend (target) to add Target +// Independent Optimization passes to the Pass Manager. +bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { + + PM.add(createConstantPropagationPass()); + PM.add(createLoopSimplifyPass()); + PM.add(createDeadCodeEliminationPass()); + PM.add(createConstantPropagationPass()); + PM.add(createLoopUnrollPass()); + PM.add(createLoopStrengthReducePass(getTargetLowering())); + return true; +} + +bool HexagonTargetMachine::addInstSelector(PassManagerBase &PM) { + PM.add(createHexagonRemoveExtendOps(*this)); + PM.add(createHexagonISelDag(*this)); + return false; +} + + +bool HexagonTargetMachine::addPreRegAlloc(PassManagerBase &PM) { + if (!DisableHardwareLoops) { + PM.add(createHexagonHardwareLoops()); + } + + return false; +} + +bool HexagonTargetMachine::addPostRegAlloc(PassManagerBase &PM) { + PM.add(createHexagonCFGOptimizer(*this)); + return true; +} + + +bool HexagonTargetMachine::addPreSched2(PassManagerBase &PM) { + PM.add(createIfConverterPass()); + return true; +} + +bool HexagonTargetMachine::addPreEmitPass(PassManagerBase &PM) { + + if (!DisableHardwareLoops) { + PM.add(createHexagonFixupHwLoops()); + } + + // Expand Spill code for predicate registers. + PM.add(createHexagonExpandPredSpillCode(*this)); + + // Split up TFRcondsets into conditional transfers. + PM.add(createHexagonSplitTFRCondSets(*this)); + + return false; +} diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h new file mode 100644 index 0000000..e27d3ae --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -0,0 +1,86 @@ +//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Hexagon specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonTARGETMACHINE_H +#define HexagonTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "HexagonInstrInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonISelLowering.h" +#include "HexagonSelectionDAGInfo.h" +#include "HexagonFrameLowering.h" + +namespace llvm { + +class Module; + +class HexagonTargetMachine : public LLVMTargetMachine { + const TargetData DataLayout; // Calculates type size & alignment. + HexagonSubtarget Subtarget; + HexagonTargetLowering TLInfo; + HexagonInstrInfo InstrInfo; + HexagonSelectionDAGInfo TSInfo; + HexagonFrameLowering FrameLowering; + const InstrItineraryData* InstrItins; + +public: + HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, + StringRef FS, TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL); + + virtual const HexagonInstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual const HexagonSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + virtual const HexagonRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const InstrItineraryData* getInstrItineraryData() const { + return InstrItins; + } + + + virtual const HexagonTargetLowering* getTargetLowering() const { + return &TLInfo; + } + + virtual const HexagonFrameLowering* getFrameLowering() const { + return &FrameLowering; + } + + virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + virtual const TargetData *getTargetData() const { return &DataLayout; } + static unsigned getModuleMatchQuality(const Module &M); + + // Pass Pipeline Configuration. + virtual bool addPassesForOptimizations(PassManagerBase &PM); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addPreRegAlloc(llvm::PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &PM); + virtual bool addPreSched2(PassManagerBase &PM); +}; + +extern bool flag_aligned_memcpy; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp new file mode 100644 index 0000000..188337d --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -0,0 +1,94 @@ +//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonTargetAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Target/TargetData.h" +#include "llvm/DerivedTypes.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/CommandLine.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetObjectFile.h" +#include "HexagonTargetMachine.h" + +using namespace llvm; + +static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold", + cl::init(8), cl::Hidden); + +void HexagonTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + + SmallDataSection = + getContext().getELFSection(".sdata", ELF::SHT_PROGBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getDataRel()); + SmallBSSSection = + getContext().getELFSection(".sbss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getBSS()); +} + +// sdata/sbss support taken largely from the MIPS Backend. +static bool IsInSmallSection(uint64_t Size) { + return Size > 0 && Size <= (uint64_t)SmallDataThreshold; +} +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const { + // If the primary definition of this global value is outside the current + // translation unit or the global value is available for inspection but not + // emission, then do nothing. + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return false; + + // Otherwise, Check if GV should be in sdata/sbss, when normally it would end + // up in getKindForGlobal(GV, TM). + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +} + +/// IsGlobalInSmallSection - Return true if this global value should be +/// placed into small data/bss section. +bool HexagonTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, + SectionKind Kind) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); + if (!GVA) + return false; + + if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) { + Type *Ty = GV->getType()->getElementType(); + return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); + } + + return false; +} + +const MCSection *HexagonTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // Handle Small Section classification here. + if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallBSSSection; + if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); +} diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h new file mode 100644 index 0000000..101c1f2 --- /dev/null +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -0,0 +1,40 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonTARGETOBJECTFILE_H +#define HexagonTARGETOBJECTFILE_H + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + + class HexagonTargetObjectFile : public TargetLoweringObjectFileELF { + const MCSectionELF *SmallDataSection; + const MCSectionELF *SmallBSSSection; + public: + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + /// IsGlobalInSmallSection - Return true if this global address should be + /// placed into small data/bss section. + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM, + SectionKind Kind) const; + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const; + + const MCSection* SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h new file mode 100644 index 0000000..21b2d67 --- /dev/null +++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h @@ -0,0 +1,141 @@ +//==-- HexagonVarargsCallingConvention.h - Calling Conventions ---*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the functions that assign locations to outgoing function +// arguments. Adapted from the target independent version but this handles +// calls to varargs functions +// +//===----------------------------------------------------------------------===// +// + + + + +static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem); + + +static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem) { + unsigned ByValSize = 0; + if (ArgFlags.isByVal() && + ((ByValSize = ArgFlags.getByValSize()) > + (MVT(MVT::i64).getSizeInBits() / 8))) { + ForceMem = true; + } + + + // Only assign registers for named (non varargs) arguments + if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <= + NonVarArgsParams))) { + + if (LocVT == MVT::i32 || + LocVT == MVT::i16 || + LocVT == MVT::i8 || + LocVT == MVT::f32) { + static const unsigned RegList1[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList1, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + if (LocVT == MVT::i64 || + LocVT == MVT::f64) { + static const unsigned RegList2[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2 + }; + if (unsigned Reg = State.AllocateReg(RegList2, 3)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + } + + const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); + unsigned Alignment = + State.getTarget().getTargetData()->getABITypeAlignment(ArgTy); + unsigned Size = + State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8; + + // If it's passed by value, then we need the size of the aggregate not of + // the pointer. + if (ArgFlags.isByVal()) { + Size = ByValSize; + + // Hexagon_TODO: Get the alignment of the contained type here. + Alignment = 8; + } + + unsigned Offset3 = State.AllocateStack(Size, Alignment); + State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, + LocVT.getSimpleVT(), LocInfo)); + return false; +} + + +static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, + Hexagon_CCState &State, + int NonVarArgsParams, + int CurrentParam, + bool ForceMem) { + + if (LocVT == MVT::i32 || + LocVT == MVT::f32) { + static const unsigned RegList1[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, + Hexagon::R5 + }; + if (unsigned Reg = State.AllocateReg(RegList1, 6)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + if (LocVT == MVT::i64 || + LocVT == MVT::f64) { + static const unsigned RegList2[] = { + Hexagon::D0, Hexagon::D1, Hexagon::D2 + }; + if (unsigned Reg = State.AllocateReg(RegList2, 3)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg, + LocVT.getSimpleVT(), LocInfo)); + return false; + } + } + + const Type* ArgTy = LocVT.getTypeForEVT(State.getContext()); + unsigned Alignment = + State.getTarget().getTargetData()->getABITypeAlignment(ArgTy); + unsigned Size = + State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8; + + unsigned Offset3 = State.AllocateStack(Size, Alignment); + State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3, + LocVT.getSimpleVT(), LocInfo)); + return false; +} diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt new file mode 100644 index 0000000..84ea6a0 --- /dev/null +++ b/lib/Target/Hexagon/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/Hexagon/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = TargetInfo MCTargetDesc + +[component_0] +type = TargetGroup +name = Hexagon +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = HexagonCodeGen +parent = Hexagon +required_libraries = AsmPrinter CodeGen Core HexagonInfo SelectionDAG Support Target MC HexagonDesc +add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..8e3da99 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMHexagonDesc + HexagonMCTargetDesc.cpp + HexagonMCAsmInfo.cpp + ) + +add_dependencies(LLVMHexagonDesc HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp new file mode 100644 index 0000000..188693c --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -0,0 +1,36 @@ +//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HexagonMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCAsmInfo.h" + +using namespace llvm; + +HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "//"; + HasLEB128 = true; + + PrivateGlobalPrefix = ".L"; + LCOMMDirectiveType = LCOMM::ByteAlignment; + InlineAsmStart = "# InlineAsm Start"; + InlineAsmEnd = "# InlineAsm End"; + ZeroDirective = "\t.space\t"; + AscizDirective = "\t.string\t"; + WeakRefDirective = "\t.weak\t"; + + UsesELFSectionDirectiveForBSS = true; + ExceptionsType = ExceptionHandling::DwarfCFI; +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h new file mode 100644 index 0000000..8196e95 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the HexagonMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef HexagonMCASMINFO_H +#define HexagonMCASMINFO_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + + class HexagonMCAsmInfo : public MCAsmInfo { + public: + explicit HexagonMCAsmInfo(const Target &T, StringRef TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp new file mode 100644 index 0000000..625f07c --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -0,0 +1,94 @@ +//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Cell Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "HexagonMCTargetDesc.h" +#include "HexagonMCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "HexagonGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "HexagonGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createHexagonMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitHexagonMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitHexagonMCRegisterInfo(X, Hexagon::R0); + return X; +} + +static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT, + StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitHexagonMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT); + + // VirtualFP = (R30 + #0). + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(Hexagon::R30, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + // For the time being, use static relocations, since there's really no + // support for PIC yet. + X->InitMCCodeGenInfo(Reloc::Static, CM, OL); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeHexagonTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget, + createHexagonMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheHexagonTarget, + createHexagonMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget, + createHexagonMCSubtargetInfo); +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h new file mode 100644 index 0000000..364841f --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -0,0 +1,40 @@ +//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Hexagon specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUMCTARGETDESC_H +#define SPUMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target TheHexagonTarget; + +} // End llvm namespace + +// Define symbolic names for Hexagon registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "HexagonGenRegisterInfo.inc" + +// Defines symbolic names for the Hexagon instructions. +// +#define GET_INSTRINFO_ENUM +#include "HexagonGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "HexagonGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..1114d99 --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HexagonDesc +parent = Hexagon +required_libraries = HexagonInfo MC +add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile new file mode 100644 index 0000000..67be2bc --- /dev/null +++ b/lib/Target/Hexagon/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMHexagonDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile new file mode 100644 index 0000000..c936e92 --- /dev/null +++ b/lib/Target/Hexagon/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMHexagonCodeGen +TARGET = Hexagon + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = HexagonGenRegisterInfo.inc \ + HexagonGenInstrInfo.inc \ + HexagonGenAsmWriter.inc \ + HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ + HexagonGenCallingConv.inc \ + HexagonAsmPrinter.cpp + +DIRS = TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..5b04a30 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMHexagonInfo + HexagonTargetInfo.cpp + ) + +add_dependencies(LLVMHexagonInfo HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp new file mode 100644 index 0000000..7aa5dd3 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "llvm/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheHexagonTarget; + +extern "C" void LLVMInitializeHexagonTargetInfo() { + RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(TheHexagonTarget, "hexagon", "Hexagon"); +} diff --git a/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..7b87be3 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Hexagon/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HexagonInfo +parent = Hexagon +required_libraries = MC Support +add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile new file mode 100644 index 0000000..494cca1 --- /dev/null +++ b/lib/Target/Hexagon/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMHexagonInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 358cbc8..5a42ca5 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = ARM CBackend CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore + ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the ; interpreter). diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt index ec8f52a..813767b 100644 --- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt +++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt @@ -6,11 +6,4 @@ add_llvm_library(LLVMMBlazeAsmParser MBlazeAsmParser.cpp ) -add_llvm_library_dependencies(LLVMMBlazeAsmParser - LLVMMBlazeInfo - LLVMMC - LLVMMCParser - LLVMSupport - ) - add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt index 2c61a7f..b10189a 100644 --- a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt +++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeAsmParser parent = MBlaze required_libraries = MBlazeInfo MC MCParser Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index d3f1383..71095e5 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -29,19 +29,6 @@ add_llvm_target(MBlazeCodeGen MBlazeELFWriterInfo.cpp ) -add_llvm_library_dependencies(LLVMMBlazeCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMBlazeAsmPrinter - LLVMMBlazeDesc - LLVMMBlazeInfo - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt index e0a53ee..be2dce1 100644 --- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt +++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt @@ -13,11 +13,4 @@ set_property( ) endif() -add_llvm_library_dependencies(LLVMMBlazeDisassembler - LLVMMBlazeDesc - LLVMMBlazeInfo - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt index c5c4f80..28dd9dc 100644 --- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt +++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeDisassembler parent = MBlaze required_libraries = MBlazeDesc MBlazeInfo MC Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 3087317..ccc3a05 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -123,6 +123,7 @@ static unsigned decodeSEXT(uint32_t insn) { case 0x41: return MBlaze::SRL; case 0x21: return MBlaze::SRC; case 0x01: return MBlaze::SRA; + case 0xE0: return MBlaze::CLZ; } } @@ -176,6 +177,13 @@ static unsigned decodeBR(uint32_t insn) { } static unsigned decodeBRI(uint32_t insn) { + switch (insn&0x3FFFFFF) { + default: break; + case 0x0020004: return MBlaze::IDMEMBAR; + case 0x0220004: return MBlaze::DMEMBAR; + case 0x0420004: return MBlaze::IMEMBAR; + } + switch ((insn>>16)&0x1F) { default: return UNSUPPORTED; case 0x00: return MBlaze::BRI; @@ -531,6 +539,9 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr, default: return Fail; + case MBlazeII::FC: + break; + case MBlazeII::FRRRR: if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) return Fail; @@ -547,6 +558,13 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr, instr.addOperand(MCOperand::CreateReg(RB)); break; + case MBlazeII::FRR: + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return Fail; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); + break; + case MBlazeII::FRI: switch (opcode) { default: diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt index aff0b3d..586e2d3 100644 --- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt +++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt @@ -5,9 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter MBlazeInstPrinter.cpp ) -add_llvm_library_dependencies(LLVMMBlazeAsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt index 7a21f1e..3a21a05 100644 --- a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt +++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeAsmPrinter parent = MBlaze required_libraries = MC Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index 570ab08..5297563 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -1,4 +1,4 @@ -//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===// +//===-- MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt index f1a3f5d..0b29007 100644 --- a/lib/Target/MBlaze/LLVMBuild.txt +++ b/lib/Target/MBlaze/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = MBlaze @@ -29,4 +32,3 @@ name = MBlazeCodeGen parent = MBlaze required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index ff051e3..c751dd8 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -310,9 +310,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + while (I != Pred->begin() && !(--I)->isTerminator()) ; // Noop - return I == Pred->end() || !I->getDesc().isBarrier(); + return I == Pred->end() || !I->isBarrier(); } // Force static initialization. diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index c07570a..19e787d 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -29,13 +29,11 @@ using namespace llvm; STATISTIC(FilledSlots, "Number of delay slots filled"); -namespace llvm { -cl::opt<bool> DisableDelaySlotFiller( +static cl::opt<bool> MBDisableDelaySlotFiller( "disable-mblaze-delay-filler", cl::init(false), cl::desc("Disable the MBlaze delay slot filter."), cl::Hidden); -} namespace { struct Filler : public MachineFunctionPass { @@ -109,7 +107,6 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, // Hazard check MachineBasicBlock::iterator a = candidate; MachineBasicBlock::iterator b = slot; - MCInstrDesc desc = candidate->getDesc(); // MBB layout:- // candidate := a0 = operation(a1, a2) @@ -123,7 +120,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, // 4. b0 is one or more of {a1, a2} // 5. a accesses memory, and the middle bit // contains a store operation. - bool a_is_memory = desc.mayLoad() || desc.mayStore(); + bool a_is_memory = candidate->mayLoad() || candidate->mayStore(); // Determine the number of operands in the slot instruction and in the // candidate instruction. @@ -156,7 +153,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate, } // Check hazard type 5 - if (a_is_memory && m->getDesc().mayStore()) + if (a_is_memory && m->mayStore()) return true; } @@ -183,8 +180,8 @@ static bool isDelayFiller(MachineBasicBlock &MBB, if (candidate == MBB.begin()) return false; - MCInstrDesc brdesc = (--candidate)->getDesc(); - return (brdesc.hasDelaySlot()); + --candidate; + return (candidate->hasDelaySlot()); } static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) { @@ -211,9 +208,8 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) { break; --I; - MCInstrDesc desc = I->getDesc(); - if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) || - desc.isCall() || desc.isReturn() || desc.isBarrier() || + if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) || + I->isCall() || I->isReturn() || I->isBarrier() || hasUnknownSideEffects(I)) break; @@ -232,11 +228,11 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) { bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->getDesc().hasDelaySlot()) { + if (I->hasDelaySlot()) { MachineBasicBlock::iterator D = MBB.end(); MachineBasicBlock::iterator J = I; - if (!DisableDelaySlotFiller) + if (!MBDisableDelaySlotFiller) D = findDelayInstr(MBB,I); ++FilledSlots; diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index f28d5a7..37919bc 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -32,13 +32,11 @@ using namespace llvm; -namespace llvm { - cl::opt<bool> DisableStackAdjust( - "disable-mblaze-stack-adjust", - cl::init(false), - cl::desc("Disable MBlaze stack layout adjustment."), - cl::Hidden); -} +static cl::opt<bool> MBDisableStackAdjust( + "disable-mblaze-stack-adjust", + cl::init(false), + cl::desc("Disable MBlaze stack layout adjustment."), + cl::Hidden); static void replaceFrameIndexes(MachineFunction &MF, SmallVector<std::pair<int,int64_t>, 16> &FR) { @@ -85,7 +83,7 @@ static void replaceFrameIndexes(MachineFunction &MF, //===----------------------------------------------------------------------===// static void analyzeFrameIndexes(MachineFunction &MF) { - if (DisableStackAdjust) return; + if (MBDisableStackAdjust) return; MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); @@ -336,7 +334,8 @@ int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) // if frame pointer elimination is disabled. bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects(); } void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const { diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 148d906..0002174 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -167,7 +167,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index 54f605f..4c6034d 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -35,6 +35,7 @@ def FRIR : Format<17>; // RSUBI def FRRRR : Format<18>; // RSUB, FRSUB def FRI : Format<19>; // RSUB, FRSUB def FC : Format<20>; // NOP +def FRR : Format<21>; // CLZ //===----------------------------------------------------------------------===// // Describe MBlaze instructions format @@ -202,3 +203,26 @@ class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr, let Inst{11-16} = flags; let Inst{17-31} = imm15; } + +//===----------------------------------------------------------------------===// +// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|> +//===----------------------------------------------------------------------===// +class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> { + bits<5> rd; + bits<5> ra; + + let Inst{6-10} = rd; + let Inst{11-15} = ra; + let Inst{16-31} = flags; +} + +//===----------------------------------------------------------------------===// +// MBAR instruction class in MBlaze : <|opcode|rd|imm15|> +//===----------------------------------------------------------------------===// +class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr, + list<dag> pattern, InstrItinClass itin> : + MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> { + let Inst{6-31} = flags; +} diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 1d8c987..9fe2a49 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -594,9 +594,18 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, //===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { - def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>; + def NOP : MBlazeInst<0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>; } +let Predicates=[HasPatCmp] in { + def CLZ : TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src), + "clz $dst, $src", [], IIC_ALU>; +} + +def IMEMBAR : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar 2", [], IIC_ALU>; +def DMEMBAR : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar 1", [], IIC_ALU>; +def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar 0", [], IIC_ALU>; + let usesCustomInserter = 1 in { def Select_CC : MBlazePseudo<(outs GPR:$dst), (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed @@ -751,6 +760,56 @@ def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>; def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>; // SET_CC operations +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 3)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 4)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 5)>; +def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU (i32 R0), GPR:$L), 6)>; + +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 3)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 4)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 5)>; +def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE), + (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), + (CMPU GPR:$R, (i32 R0)), 6)>; + def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), (CMP GPR:$R, GPR:$L), 1)>; @@ -787,6 +846,68 @@ def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)), (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>; // SELECT_CC +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETEQ), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETNE), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETGT), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETLT), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETGE), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETLE), + (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETUGT), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETULT), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETUGE), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>; +def : Pat<(selectcc (i32 GPR:$L), (i32 0), + (i32 GPR:$T), (i32 GPR:$F), SETULE), + (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>; + +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETEQ), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETNE), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETGT), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETLT), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETGE), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETLE), + (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETUGT), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETULT), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETUGE), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>; +def : Pat<(selectcc (i32 0), (i32 GPR:$R), + (i32 GPR:$T), (i32 GPR:$F), SETULE), + (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>; + def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R), (i32 GPR:$T), (i32 GPR:$F), SETEQ), (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>; @@ -827,6 +948,48 @@ def : Pat<(br bb:$T), (BRID bb:$T)>; def : Pat<(brind GPR:$T), (BRAD GPR:$T)>; // BRCOND instructions +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T), + (BEQID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T), + (BNEID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T), + (BGTID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T), + (BLTID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T), + (BGEID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T), + (BLEID GPR:$L, bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T), + (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T), + (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T), + (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>; +def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T), + (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>; + +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T), + (BEQID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T), + (BNEID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T), + (BGTID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T), + (BLTID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T), + (BGEID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T), + (BLEID GPR:$R, bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T), + (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T), + (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T), + (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>; +def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T), + (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>; + def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T), (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>; def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T), @@ -869,11 +1032,11 @@ def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>; def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>; // 16-bit load and store -def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>; +def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>; def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>; // 8-bit load and store -def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>; +def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>; def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>; // Peepholes diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp index a7e400b..7e5598f 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp +++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp @@ -1,4 +1,4 @@ -//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===// +//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 4ad7bd6..5ed81dd 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -33,16 +33,16 @@ extern "C" void LLVMInitializeMBlazeTarget() { // an easier handling. MBlazeTargetMachine:: MBlazeTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), - Subtarget(TT, CPU, FS), - DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), + InstrInfo(*this), + FrameLowering(Subtarget), + TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { } // Install an instruction selector pass using diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 1c1aa53..036f1b6 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -43,6 +43,7 @@ namespace llvm { public: MBlazeTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt index 37871b6..6fa7f43 100644 --- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt @@ -5,11 +5,4 @@ add_llvm_library(LLVMMBlazeDesc MBlazeMCTargetDesc.cpp ) -add_llvm_library_dependencies(LLVMMBlazeDesc - LLVMMBlazeAsmPrinter - LLVMMBlazeInfo - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt index e89811b..4982f0f 100644 --- a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeDesc parent = MBlaze required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support add_to_library_groups = MBlaze - diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp index 08f7d46..d5acbe9 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp @@ -58,6 +58,11 @@ public: bool MayNeedRelaxation(const MCInst &Inst) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; @@ -87,6 +92,18 @@ bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { return hasExprOrImm; } +bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME: Is this right? It's what the "generic" code was doing before, + // but is X86 specific. Is it actually true for MBlaze also, or was it + // just close enough to not be a big deal? + // + // Relax if the value is too big for a (signed) i8. + return int64_t(Value) != int64_t(int8_t(Value)); +} + void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { Res = Inst; Res.setOpcode(getRelaxedOpcode(Inst.getOpcode())); diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h index 776dbc4..c8bdd6f 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h @@ -51,6 +51,7 @@ namespace MBlazeII { FRRRR, FRI, FC, + FRR, FormMask = 63 //===------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt index 93fce58..b554d9b 100644 --- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt +++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt @@ -5,10 +5,4 @@ add_llvm_library(LLVMMBlazeInfo MBlazeTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMMBlazeInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen) diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt index 938a1d9..ba7ee5d 100644 --- a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MBlazeInfo parent = MBlaze required_libraries = MC Support Target add_to_library_groups = MBlaze - diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 55c2d7d..7daa7a2 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -22,19 +22,6 @@ add_llvm_target(MSP430CodeGen MSP430MCInstLower.cpp ) -add_llvm_library_dependencies(LLVMMSP430CodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMMSP430AsmPrinter - LLVMMSP430Desc - LLVMMSP430Info - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt index ce39d95..64ac994 100644 --- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt +++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter MSP430InstPrinter.cpp ) -add_llvm_library_dependencies(LLVMMSP430AsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen) diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt index aeb863a..37b8c25 100644 --- a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt +++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MSP430AsmPrinter parent = MSP430 required_libraries = MC Support add_to_library_groups = MSP430 - diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt index 024312b..51d9702 100644 --- a/lib/Target/MSP430/LLVMBuild.txt +++ b/lib/Target/MSP430/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = MSP430 @@ -27,4 +30,3 @@ name = MSP430CodeGen parent = MSP430 required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target add_to_library_groups = MSP430 - diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt index c2dd448..adc95c5 100644 --- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt @@ -3,12 +3,4 @@ add_llvm_library(LLVMMSP430Desc MSP430MCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMMSP430Desc - LLVMMC - LLVMMSP430AsmPrinter - LLVMMSP430Info - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMSP430Desc MSP430CommonTableGen) diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt index 1890e9d..3319d93 100644 --- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MSP430Desc parent = MSP430 required_libraries = MC MSP430AsmPrinter MSP430Info Support Target add_to_library_groups = MSP430 - diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index c99f4ab..e406ff2 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -29,7 +29,7 @@ using namespace llvm; bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (DisableFramePointerElim(MF) || + return (MF.getTarget().Options.DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } @@ -140,7 +140,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF, while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); - if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator()) + if (Opc != MSP430::POP16r && !PI->isTerminator()) break; --MBBI; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5c94137..884d69b 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -122,8 +122,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::CTTZ, MVT::i8, Expand); setOperationAction(ISD::CTTZ, MVT::i16, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); setOperationAction(ISD::CTLZ, MVT::i8, Expand); setOperationAction(ISD::CTLZ, MVT::i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); setOperationAction(ISD::CTPOP, MVT::i8, Expand); setOperationAction(ISD::CTPOP, MVT::i16, Expand); diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 81f766e..9d3c7e9 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -158,13 +158,12 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { } bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isTerminator()) return false; + if (!MI->isTerminator()) return false; // Conditional branch is a special case. - if (MCID.isBranch() && !MCID.isBarrier()) + if (MI->isBranch() && !MI->isBarrier()) return true; - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return true; return !isPredicated(MI); } @@ -189,7 +188,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // A terminator that isn't a branch can't easily be handled // by this analysis. - if (!I->getDesc().isBranch()) + if (!I->isBranch()) return true; // Cannot handle indirect branches. diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index fe185fb..a0fc3da 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -28,9 +28,10 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 4fb060f..28d482a 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine { public: MSP430TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt index 1526946..f6b40ea 100644 --- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt +++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMMSP430Info MSP430TargetInfo.cpp ) -add_llvm_library_dependencies(LLVMMSP430Info - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMSP430Info MSP430CommonTableGen) diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt index a745ea8..deafc2d 100644 --- a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MSP430Info parent = MSP430 required_libraries = MC Support Target add_to_library_groups = MSP430 - diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index ac9cfc0..a13c0e8 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -29,19 +29,6 @@ add_llvm_target(MipsCodeGen MipsSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMMipsCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMMipsAsmPrinter - LLVMMipsDesc - LLVMMipsInfo - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt index c45b35d..3e9fbf1 100644 --- a/lib/Target/Mips/InstPrinter/CMakeLists.txt +++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter MipsInstPrinter.cpp ) -add_llvm_library_dependencies(LLVMMipsAsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen) diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt index d953a61..317057b 100644 --- a/lib/Target/Mips/InstPrinter/LLVMBuild.txt +++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MipsAsmPrinter parent = Mips required_libraries = MC Support add_to_library_groups = Mips - diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index f544d39..3e9c46a 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -96,10 +96,14 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { case MCSymbolRefExpr::VK_None: break; case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break; case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break; + case MCSymbolRefExpr::VK_Mips_GOT16: OS << "%got("; break; case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break; case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break; case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break; case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break; + case MCSymbolRefExpr::VK_Mips_TLSLDM: OS << "%tlsldm("; break; + case MCSymbolRefExpr::VK_Mips_DTPREL_HI:OS << "%dtprel_hi("; break; + case MCSymbolRefExpr::VK_Mips_DTPREL_LO:OS << "%dtprel_lo("; break; case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break; case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index e733b52..bcd32bc 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = Mips @@ -28,4 +31,3 @@ name = MipsCodeGen parent = Mips required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target add_to_library_groups = Mips - diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index 2ceb5c9..0eb0a55 100644 --- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -5,11 +5,4 @@ add_llvm_library(LLVMMipsDesc MipsMCTargetDesc.cpp ) -add_llvm_library_dependencies(LLVMMipsDesc - LLVMMC - LLVMMipsAsmPrinter - LLVMMipsInfo - LLVMSupport - ) - add_dependencies(LLVMMipsDesc MipsCommonTableGen) diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt index d6f5dd2..29f5da6 100644 --- a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MipsDesc parent = Mips required_libraries = MC MipsAsmPrinter MipsInfo Support add_to_library_groups = Mips - diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 7bc5fe4..60ff4fe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -29,13 +29,19 @@ #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; +// Prepare value for the target space for it static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // Add/subtract and shift switch (Kind) { default: + return 0; + case FK_GPRel_4: + case FK_Data_4: + case Mips::fixup_Mips_LO16: break; case Mips::fixup_Mips_PC16: // So far we are only using this type for branches. @@ -52,25 +58,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { // address range. Value >>= 2; break; - } - - // Mask off value for placement as an operand - switch (Kind) { - default: - break; - case FK_GPRel_4: - case FK_Data_4: - Value &= 0xffffffff; - break; - case Mips::fixup_Mips_26: - Value &= 0x03ffffff; - break; - case Mips::fixup_Mips_LO16: - case Mips::fixup_Mips_PC16: - Value &= 0x0000ffff; - break; case Mips::fixup_Mips_HI16: - Value >>= 16; + case Mips::fixup_Mips_GOT_Local: + // Get the higher 16-bits. Also add 1 if bit 15 is 1. + Value = (Value >> 16) + ((Value & 0x8000) != 0); break; } @@ -96,42 +87,40 @@ public: /// fixup kind as appropriate. void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { - unsigned Kind = (unsigned)Fixup.getKind(); - Value = adjustFixupValue(Kind, Value); + MCFixupKind Kind = Fixup.getKind(); + Value = adjustFixupValue((unsigned)Kind, Value); if (!Value) - return; // Doesn't change encoding. + return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); - switch (Kind) { - default: - llvm_unreachable("Unknown fixup kind!"); - case Mips::fixup_Mips_GOT16: // This will be fixed up at link time - break; - case FK_GPRel_4: - case FK_Data_4: - case Mips::fixup_Mips_26: - case Mips::fixup_Mips_LO16: - case Mips::fixup_Mips_PC16: - case Mips::fixup_Mips_HI16: - // For each byte of the fragment that the fixup touches, mask i - // the fixup value. The Value has been "split up" into the appr - // bitfields above. - for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes - Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff); - break; + // FIXME: The below code will not work across endian models + // How many bytes/bits are we fixing up? + unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1; + uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1; + + // Grab current value, if any, from bits. + uint64_t CurVal = 0; + for (unsigned i = 0; i != NumBytes; ++i) + CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8); + + CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask); + + // Write out the bytes back to the code/data bits. + // First the unaffected bits and then the fixup. + for (unsigned i = 0; i != NumBytes; ++i) { + Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff); } - } +} unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = { - // This table *must* be in the order that the fixup_* kinds a + // This table *must* be in same the order of fixup_* kinds in // MipsFixupKinds.h. // // name offset bits flags - { "fixup_Mips_NONE", 0, 0, 0 }, { "fixup_Mips_16", 0, 16, 0 }, { "fixup_Mips_32", 0, 32, 0 }, { "fixup_Mips_REL32", 0, 32, 0 }, @@ -140,7 +129,8 @@ public: { "fixup_Mips_LO16", 0, 16, 0 }, { "fixup_Mips_GPREL16", 0, 16, 0 }, { "fixup_Mips_LITERAL", 0, 16, 0 }, - { "fixup_Mips_GOT16", 0, 16, 0 }, + { "fixup_Mips_GOT_Global", 0, 16, 0 }, + { "fixup_Mips_GOT_Local", 0, 16, 0 }, { "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_Mips_CALL16", 0, 16, 0 }, { "fixup_Mips_GPREL32", 0, 32, 0 }, @@ -173,6 +163,17 @@ public: return false; } + /// fixupNeedsRelaxation - Target specific predicate for whether a given + /// fixup requires the associated instruction to be relaxed. + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME. + assert(0 && "RelaxInstruction() unimplemented"); + return false; + } + /// RelaxInstruction - Relax the instruction in the given fragment /// to the next wider instruction. /// diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index cebfde0..00fc5df 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -31,8 +31,9 @@ namespace MipsII { MO_NO_FLAG, - /// MO_GOT - Represents the offset into the global offset table at which + /// MO_GOT16 - Represents the offset into the global offset table at which /// the address the relocation entry symbol resides during execution. + MO_GOT16, MO_GOT, /// MO_GOT_CALL - Represents the offset into the global offset table at @@ -55,6 +56,13 @@ namespace MipsII { // Dynamic TLS). MO_TLSGD, + /// MO_TLSLDM - Represents the offset into the global offset table at which + // the module ID and TSL block offset reside during execution (Local + // Dynamic TLS). + MO_TLSLDM, + MO_DTPREL_HI, + MO_DTPREL_LO, + /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial // Exec TLS). MO_GOTTPREL, @@ -180,6 +188,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) case Mips::D14: return 28; case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + case Mips::HWR29: return 29; case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: case Mips::D15: diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 20890ed..a56c002 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -14,74 +14,82 @@ namespace llvm { namespace Mips { - enum Fixups { - // fixup_Mips_xxx - R_MIPS_NONE - fixup_Mips_NONE = FirstTargetFixupKind, + // Although most of the current fixup types reflect a unique relocation + // one can have multiple fixup types for a given relocation and thus need + // to be uniquely named. + // + // This table *must* be in the save order of + // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] + // in MipsAsmBackend.cpp. + // + enum Fixups { + // Branch fixups resulting in R_MIPS_16. + fixup_Mips_16 = FirstTargetFixupKind, - // fixup_Mips_xxx - R_MIPS_16. - fixup_Mips_16, + // Pure 32 bit data fixup resulting in - R_MIPS_32. + fixup_Mips_32, - // fixup_Mips_xxx - R_MIPS_32. - fixup_Mips_32, + // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32. + fixup_Mips_REL32, - // fixup_Mips_xxx - R_MIPS_REL32. - fixup_Mips_REL32, + // Jump 26 bit fixup resulting in - R_MIPS_26. + fixup_Mips_26, - // fixup_Mips_xxx - R_MIPS_26. - fixup_Mips_26, + // Pure upper 16 bit fixup resulting in - R_MIPS_HI16. + fixup_Mips_HI16, - // fixup_Mips_xxx - R_MIPS_HI16. - fixup_Mips_HI16, + // Pure lower 16 bit fixup resulting in - R_MIPS_LO16. + fixup_Mips_LO16, - // fixup_Mips_xxx - R_MIPS_LO16. - fixup_Mips_LO16, + // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16. + fixup_Mips_GPREL16, - // fixup_Mips_xxx - R_MIPS_GPREL16. - fixup_Mips_GPREL16, + // 16 bit literal fixup resulting in - R_MIPS_LITERAL. + fixup_Mips_LITERAL, - // fixup_Mips_xxx - R_MIPS_LITERAL. - fixup_Mips_LITERAL, + // Global symbol fixup resulting in - R_MIPS_GOT16. + fixup_Mips_GOT_Global, - // fixup_Mips_xxx - R_MIPS_GOT16. - fixup_Mips_GOT16, + // Local symbol fixup resulting in - R_MIPS_GOT16. + fixup_Mips_GOT_Local, - // fixup_Mips_xxx - R_MIPS_PC16. - fixup_Mips_PC16, + // PC relative branch fixup resulting in - R_MIPS_PC16. + fixup_Mips_PC16, - // fixup_Mips_xxx - R_MIPS_CALL16. - fixup_Mips_CALL16, + // resulting in - R_MIPS_CALL16. + fixup_Mips_CALL16, - // fixup_Mips_xxx - R_MIPS_GPREL32. - fixup_Mips_GPREL32, + // resulting in - R_MIPS_GPREL32. + fixup_Mips_GPREL32, - // fixup_Mips_xxx - R_MIPS_SHIFT5. - fixup_Mips_SHIFT5, + // resulting in - R_MIPS_SHIFT5. + fixup_Mips_SHIFT5, - // fixup_Mips_xxx - R_MIPS_SHIFT6. - fixup_Mips_SHIFT6, + // resulting in - R_MIPS_SHIFT6. + fixup_Mips_SHIFT6, - // fixup_Mips_xxx - R_MIPS_64. - fixup_Mips_64, + // Pure 64 bit data fixup resulting in - R_MIPS_64. + fixup_Mips_64, - // fixup_Mips_xxx - R_MIPS_TLS_GD. - fixup_Mips_TLSGD, + // resulting in - R_MIPS_TLS_GD. + fixup_Mips_TLSGD, - // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL. - fixup_Mips_GOTTPREL, + // resulting in - R_MIPS_TLS_GOTTPREL. + fixup_Mips_GOTTPREL, - // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16. - fixup_Mips_TPREL_HI, + // resulting in - R_MIPS_TLS_TPREL_HI16. + fixup_Mips_TPREL_HI, - // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16. - fixup_Mips_TPREL_LO, + // resulting in - R_MIPS_TLS_TPREL_LO16. + fixup_Mips_TPREL_LO, - // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16 - fixup_Mips_Branch_PCRel, + // PC relative branch fixup resulting in - R_MIPS_PC16 + fixup_Mips_Branch_PCRel, - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind - }; + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind + }; } // namespace Mips } // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 0c3cbb3..463dcfe 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -194,8 +194,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_GOT_CALL: FixupKind = Mips::fixup_Mips_CALL16; break; + case MCSymbolRefExpr::VK_Mips_GOT16: + FixupKind = Mips::fixup_Mips_GOT_Global; + break; case MCSymbolRefExpr::VK_Mips_GOT: - FixupKind = Mips::fixup_Mips_GOT16; + FixupKind = Mips::fixup_Mips_GOT_Local; break; case MCSymbolRefExpr::VK_Mips_ABS_HI: FixupKind = Mips::fixup_Mips_HI16; @@ -245,8 +248,8 @@ unsigned MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { assert(MI.getOperand(OpNo).isImm()); - unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); - return szEncoding - 1; + unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + return SizeEncoding - 1; } // FIXME: should be called getMSBEncoding @@ -256,10 +259,10 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { assert(MI.getOperand(OpNo-1).isImm()); assert(MI.getOperand(OpNo).isImm()); - unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups); - unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups); + unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); - return pos + sz - 1; + return Position + Size - 1; } #include "MipsGenMCCodeEmitter.inc" diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 39c2c16..e9e0f60 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -79,9 +79,9 @@ def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, MipsGenericItineraries, Features>; -def : Proc<"mips32r1", [FeatureMips32]>; -def : Proc<"4ke", [FeatureMips32r2]>; -def : Proc<"mips64r1", [FeatureMips64]>; +def : Proc<"mips32", [FeatureMips32]>; +def : Proc<"mips32r2", [FeatureMips32r2]>; +def : Proc<"mips64", [FeatureMips64]>; def : Proc<"mips64r2", [FeatureMips64r2]>; def MipsAsmWriter : AsmWriter { diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index b0fb4fa..2996986 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -25,7 +25,7 @@ def uimm16_64 : Operand<i64> { // Transformation Function - get Imm - 32. def Subtract32 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() - 32); + return getImm(N, (unsigned)N->getZExtValue() - 32); }]>; // shamt field must fit in 5 bits. @@ -36,6 +36,19 @@ def imm32_63 : ImmLeaf<i32, [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}], Subtract32>; +// Is a 32-bit int. +def immSExt32 : ImmLeaf<i64, [{return isInt<32>(Imm);}]>; + +// Transformation Function - get the higher 16 bits. +def HIGHER : SDNodeXForm<imm, [{ + return getImm(N, (N->getZExtValue() >> 32) & 0xFFFF); +}]>; + +// Transformation Function - get the highest 16 bits. +def HIGHEST : SDNodeXForm<imm, [{ + return getImm(N, (N->getZExtValue() >> 48) & 0xFFFF); +}]>; + //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// @@ -206,6 +219,17 @@ let Uses = [SP_64] in def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, Requires<[IsN64]>; +def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; + +def DEXT : ExtBase<3, "dext", CPU64Regs>; +def DINS : InsBase<7, "dins", CPU64Regs>; + +def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "dsll32\t$rd, $rt, 0", [], IIAlu>; + +def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -216,9 +240,15 @@ def : Pat<(i64 immSExt16:$in), def : Pat<(i64 immZExt16:$in), (ORi64 ZERO_64, imm:$in)>; +// 32-bit immediates +def : Pat<(i64 immSExt32:$imm), + (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; + // Arbitrary immediates def : Pat<(i64 imm:$imm), - (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; + (ORi64 (DSLL (ORi64 (DSLL (ORi64 (LUi64 (HIGHEST imm:$imm)), + (HIGHER imm:$imm)), 16), (HI16 imm:$imm)), 16), + (LO16 imm:$imm))>; // extended loads let Predicates = [NotN64] in { @@ -236,11 +266,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; +def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>; def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; +def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>; def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; @@ -250,6 +282,15 @@ def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>; + +def : WrapperPat<tglobaladdr, DADDiu, GP_64>; +def : WrapperPat<tconstpool, DADDiu, GP_64>; +def : WrapperPat<texternalsym, DADDiu, GP_64>; +def : WrapperPat<tblockaddress, DADDiu, GP_64>; +def : WrapperPat<tjumptable, DADDiu, GP_64>; +def : WrapperPat<tglobaltlsaddr, DADDiu, GP_64>; defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64, ZERO_64>; @@ -268,3 +309,6 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>; def : Pat<(i32 (trunc CPU64Regs:$src)), (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>; +// 32-to-64-bit extension +def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>; +def : Pat<(i64 (zext CPURegs:$src)), (DSRL32 (DSLL64_32 CPURegs:$src), 0)>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index d27e3ab..a5505d3 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -96,19 +96,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (!OutStreamer.hasRawTextSupport()) { // Lower CPLOAD and CPRESTORE - if (Opc == Mips::CPLOAD) { + if (Opc == Mips::CPLOAD) MCInstLowering.LowerCPLOAD(MI, MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I - != MCInsts.end(); ++I) + else if (Opc == Mips::CPRESTORE) + MCInstLowering.LowerCPRESTORE(MI, MCInsts); + + if (!MCInsts.empty()) { + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); + I != MCInsts.end(); ++I) OutStreamer.EmitInstruction(*I); return; } - - if (Opc == Mips::CPRESTORE) { - MCInstLowering.LowerCPRESTORE(MI, TmpInst0); - OutStreamer.EmitInstruction(TmpInst0); - return; - } } OutStreamer.EmitInstruction(TmpInst0); @@ -317,9 +315,9 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* // Otherwise, check the last instruction. // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ; + while (I != Pred->begin() && !(--I)->isTerminator()) ; - return !I->getDesc().isBarrier(); + return !I->isBarrier(); } // Print out an operand for an inline asm expression. diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index a8f29ae..6b26e24 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -144,7 +144,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB){ MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) emitInstruction(*I); } @@ -161,7 +161,7 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, if (Form == MipsII::FrmJ) return Mips::reloc_mips_26; if ((Form == MipsII::FrmI || Form == MipsII::FrmFI) - && MI.getDesc().isBranch()) + && MI.isBranch()) return Mips::reloc_mips_branch; if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi) return Mips::reloc_mips_hi; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index be3b7a0..1d9e9b0 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -96,7 +96,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) { LastFiller = MBB.end(); for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->getDesc().hasDelaySlot()) { + if (I->hasDelaySlot()) { ++FilledSlots; Changed = true; @@ -146,7 +146,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, || I->isInlineAsm() || I->isLabel() || FI == LastFiller - || I->getDesc().isPseudo() + || I->isPseudo() // // Should not allow: // ERET, DERET or WAIT, PAUSE. Need to add these to instruction @@ -174,16 +174,15 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, if (candidate->isImplicitDef() || candidate->isKill()) return true; - MCInstrDesc MCID = candidate->getDesc(); // Loads or stores cannot be moved past a store to the delay slot // and stores cannot be moved past a load. - if (MCID.mayLoad()) { + if (candidate->mayLoad()) { if (sawStore) return true; sawLoad = true; } - if (MCID.mayStore()) { + if (candidate->mayStore()) { if (sawStore) return true; sawStore = true; @@ -191,7 +190,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, return true; } - assert((!MCID.isCall() && !MCID.isReturn()) && + assert((!candidate->isCall() && !candidate->isReturn()) && "Cannot put calls or returns in delay slot."); for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) { @@ -221,11 +220,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI, SmallSet<unsigned, 32>& RegUses) { // If MI is a call or return, just examine the explicit non-variadic operands. MCInstrDesc MCID = MI->getDesc(); - unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() : - MI->getNumOperands(); + unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() : + MI->getNumOperands(); // Add RA to RegDefs to prevent users of RA from going into delay slot. - if (MCID.isCall()) + if (MI->isCall()) RegDefs.insert(Mips::RA); for (unsigned i = 0; i != e; ++i) { diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 36aef99..2466545 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -85,8 +85,8 @@ using namespace llvm; // if frame pointer elimination is disabled. bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() - || MFI->isFrameAddressTaken(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } bool MipsFrameLowering::targetHandlesStackFrameRounding() const { diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 9c831ed..b17239d 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -86,10 +86,9 @@ private: // Complex Pattern. bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset); - // getI32Imm - Return a target constant with the specified - // value, of type i32. - inline SDValue getI32Imm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, unsigned Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); } virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -122,21 +121,16 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { } // on PIC code Load GA - if (TM.getRelocationModel() == Reloc::PIC_) { - if (Addr.getOpcode() == MipsISD::WrapperPIC) { - Base = CurDAG->getRegister(GPReg, ValTy); - Offset = Addr.getOperand(0); - return true; - } - } else { + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = CurDAG->getRegister(GPReg, ValTy); + Offset = Addr.getOperand(0); + return true; + } + + if (TM.getRelocationModel() != Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; - else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) { - Base = CurDAG->getRegister(GPReg, ValTy); - Offset = Addr; - return true; - } } // Addresses of the form FI+const or FI|const @@ -310,13 +304,24 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { } case MipsISD::ThreadPointer: { - unsigned SrcReg = Mips::HWR29; - unsigned DestReg = Mips::V1; - SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(), - Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32)); + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; + } + + SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT)); SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); ReplaceUses(SDValue(Node, 0), ResNode); return ResNode.getNode(); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b5a15cf..c9b657c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -40,11 +40,11 @@ using namespace llvm; // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { - if (!isUInt<32>(I) || !isShiftedMask_32(I)) + if (!isShiftedMask_64(I)) return false; - Size = CountPopulation_32(I); - Pos = CountTrailingZeros_32(I); + Size = CountPopulation_64(I); + Pos = CountTrailingZeros_64(I); return true; } @@ -54,9 +54,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Hi: return "MipsISD::Hi"; case MipsISD::Lo: return "MipsISD::Lo"; case MipsISD::GPRel: return "MipsISD::GPRel"; - case MipsISD::TlsGd: return "MipsISD::TlsGd"; - case MipsISD::TprelHi: return "MipsISD::TprelHi"; - case MipsISD::TprelLo: return "MipsISD::TprelLo"; case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; case MipsISD::Ret: return "MipsISD::Ret"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; @@ -72,7 +69,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::DivRemU: return "MipsISD::DivRemU"; case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; - case MipsISD::WrapperPIC: return "MipsISD::WrapperPIC"; + case MipsISD::Wrapper: return "MipsISD::Wrapper"; case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; case MipsISD::Sync: return "MipsISD::Sync"; case MipsISD::Ext: return "MipsISD::Ext"; @@ -129,7 +126,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); @@ -157,6 +156,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); @@ -555,20 +558,20 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, return SDValue(); SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1); - + unsigned ShiftRightOpc = ShiftRight.getOpcode(); + // Op's first operand must be a shift right. - if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL) + if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL) return SDValue(); // The second operand of the shift must be an immediate. - uint64_t Pos; ConstantSDNode *CN; if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1)))) return SDValue(); - Pos = CN->getZExtValue(); - + uint64_t Pos = CN->getZExtValue(); uint64_t SMPos, SMSize; + // Op's second operand must be a shifted mask. if (!(CN = dyn_cast<ConstantSDNode>(Mask)) || !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize)) @@ -576,10 +579,11 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, // Return if the shifted mask does not start at bit 0 or the sum of its size // and Pos exceeds the word's size. - if (SMPos != 0 || Pos + SMSize > 32) + EVT ValTy = N->getValueType(0); + if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits()) return SDValue(); - return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32, + return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy, ShiftRight.getOperand(0), DAG.getConstant(Pos, MVT::i32), DAG.getConstant(SMSize, MVT::i32)); @@ -630,10 +634,11 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG, // Return if the shift amount and the first bit position of mask are not the // same. - if (Shamt != SMPos0) + EVT ValTy = N->getValueType(0); + if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) return SDValue(); - return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32, + return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy, Shl.getOperand(0), DAG.getConstant(SMPos0, MVT::i32), DAG.getConstant(SMSize0, MVT::i32), @@ -1485,9 +1490,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, (GV->hasLocalLinkage() && !isa<Function>(GV))); unsigned GotFlag = IsN64 ? (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : - MipsII::MO_GOT; + (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16); SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); - GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA); + GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GA); SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA, MachinePointerInfo(), false, false, false, 0); @@ -1523,7 +1528,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); - BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset); + BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, BAGOTOffset); SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag); SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset, @@ -1535,9 +1540,9 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, SDValue MipsTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - // If the relocation model is PIC, use the General Dynamic TLS Model, - // otherwise use the Initial Exec or Local Exec TLS Model. - // TODO: implement Local Dynamic TLS model + // If the relocation model is PIC, use the General Dynamic TLS Model or + // Local Dynamic TLS model, otherwise use the Initial Exec or + // Local Exec TLS Model. GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); DebugLoc dl = GA->getDebugLoc(); @@ -1546,45 +1551,59 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { // General Dynamic TLS Model - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, - 0, MipsII::MO_TLSGD); - SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA); - SDValue GP = DAG.getRegister(Mips::GP, MVT::i32); - SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd); + bool LocalDynamic = GV->hasInternalLinkage(); + unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD; + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag); + SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA); + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + + SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); + Entry.Ty = PtrTy; Args.push_back(Entry); + std::pair<SDValue, SDValue> CallResult = - LowerCallTo(DAG.getEntryNode(), - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, - dl); - - return CallResult.first; + LowerCallTo(DAG.getEntryNode(), PtrTy, + false, false, false, false, 0, CallingConv::C, false, true, + TlsGetAddr, Args, DAG, dl); + + SDValue Ret = CallResult.first; + + if (!LocalDynamic) + return Ret; + + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + MipsII::MO_DTPREL_HI); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + MipsII::MO_DTPREL_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo); + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret); + return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo); } SDValue Offset; if (GV->isDeclaration()) { // Initial Exec TLS Model - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_GOTTPREL); - Offset = DAG.getLoad(MVT::i32, dl, + TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA); + Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), TGA, MachinePointerInfo(), false, false, false, 0); } else { // Local Exec TLS Model - SDVTList VTs = DAG.getVTList(MVT::i32); - SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_TPREL_HI); - SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, MipsII::MO_TPREL_LO); - SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1); - SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo); - Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo); + Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); } SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT); @@ -1594,34 +1613,29 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const SDValue MipsTargetLowering:: LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - SDValue ResNode; - SDValue HiPart; + SDValue HiPart, JTI, JTILo; // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI; - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); - - if (!IsPIC) { - SDValue Ops[] = { JTI }; - HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); + if (!IsPIC && !IsN64) { + JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI); + HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI); + JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO); } else {// Emit Load from Global Pointer - JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI); - HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, - MachinePointerInfo(), - false, false, false, 0); + unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag); + JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, JTI); + HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI, + MachinePointerInfo(), false, false, false, 0); + JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag); } - SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - MipsII::MO_ABS_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo); - ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - - return ResNode; + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo); + return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo); } SDValue MipsTargetLowering:: @@ -1657,7 +1671,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), N->getOffset(), GOTFlag); - CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP); + CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, CP); SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, false, 0); @@ -1685,21 +1699,29 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV), false, false, 0); } - -static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) { + +// Called if the size of integer registers is large enough to hold the whole +// floating point number. +static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) { // FIXME: Use ext/ins instructions if target architecture is Mips32r2. + EVT ValTy = Op.getValueType(); + EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits()); + uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1); DebugLoc dl = Op.getDebugLoc(); - SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1)); - SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0, - DAG.getConstant(0x7fffffff, MVT::i32)); - SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1, - DAG.getConstant(0x80000000, MVT::i32)); - SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result); + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1)); + SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0, + DAG.getConstant(Mask - 1, IntValTy)); + SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1, + DAG.getConstant(Mask, IntValTy)); + SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1); + return DAG.getNode(ISD::BITCAST, dl, ValTy, Result); } -static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) { +// Called if the size of integer registers is not large enough to hold the whole +// floating point number (e.g. f64 & 32-bit integer register). +static SDValue +LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) { // FIXME: // Use ext/ins instructions if target architecture is Mips32r2. // Eliminate redundant mfc1 and mtc1 instructions. @@ -1734,10 +1756,10 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) assert(Ty == MVT::f32 || Ty == MVT::f64); - if (Ty == MVT::f32) - return LowerFCOPYSIGN32(Op, DAG); + if (Ty == MVT::f32 || HasMips64) + return LowerFCOPYSIGNLargeIntReg(Op, DAG); else - return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle()); + return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle()); } SDValue MipsTargetLowering:: @@ -2328,7 +2350,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // node so that legalize doesn't hack it. unsigned char OpFlag; bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 - bool LoadSymAddr = false; + bool GlobalOrExternal = false; SDValue CalleeLo; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { @@ -2345,7 +2367,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, getPointerTy(), 0, OpFlag); } - LoadSymAddr = true; + GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { if (IsN64 || (!IsO32 && IsPIC)) @@ -2356,16 +2378,16 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, OpFlag = MipsII::MO_GOT_CALL; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); - LoadSymAddr = true; + GlobalOrExternal = true; } SDValue InFlag; // Create nodes that load address of callee and copy it to T9 if (IsPICCall) { - if (LoadSymAddr) { + if (GlobalOrExternal) { // Load callee address - Callee = DAG.getNode(MipsISD::WrapperPIC, dl, getPointerTy(), Callee); + Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(), Callee); SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), false, false, false, 0); @@ -2377,7 +2399,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, } else Callee = LoadValue; } + } + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { // copy to T9 unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index f2b64e3..81d093f 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -40,13 +40,6 @@ namespace llvm { // Handle gp_rel (small data/bss sections) relocation. GPRel, - // General Dynamic TLS - TlsGd, - - // Local Exec TLS - TprelHi, - TprelLo, - // Thread Pointer ThreadPointer, @@ -79,7 +72,7 @@ namespace llvm { BuildPairF64, ExtractElementF64, - WrapperPIC, + Wrapper, DynAlloc, diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index e1725fa..21a1862 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -115,7 +115,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, let Inst{15-0} = imm16; } -class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr, +class BranchBase<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI> { diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 5358dc0..ea101f7 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -29,8 +29,8 @@ using namespace llvm; MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()), - RI(*TM.getSubtargetImpl(), *this) {} - + RI(*TM.getSubtargetImpl(), *this), + UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {} const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { return RI; @@ -236,7 +236,8 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) { Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || - Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || + Opc == Mips::J) ? Opc : 0; } @@ -320,7 +321,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If there is only one terminator instruction, process it. if (!SecondLastOpc) { // Unconditional branch - if (LastOpc == Mips::J) { + if (LastOpc == UncondBrOpc) { TBB = LastInst->getOperand(0).getMBB(); return false; } @@ -337,7 +338,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If second to last instruction is an unconditional branch, // analyze it and remove the last instruction. - if (SecondLastOpc == Mips::J) { + if (SecondLastOpc == UncondBrOpc) { // Return if the last instruction cannot be removed. if (!AllowModify) return true; @@ -349,7 +350,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Conditional branch followed by an unconditional branch. // The last one must be unconditional. - if (LastOpc != Mips::J) + if (LastOpc != UncondBrOpc) return true; AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); @@ -391,14 +392,14 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Two-way Conditional branch. if (FBB) { BuildCondBr(MBB, TBB, DL, Cond); - BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); + BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB); return 2; } // One way branch. // Unconditional branch. if (Cond.empty()) - BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); + BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB); else // Conditional branch. BuildCondBr(MBB, TBB, DL, Cond); return 1; diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 8fa3052..70cc2cf 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -34,6 +34,7 @@ class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; bool IsN64; const MipsRegisterInfo RI; + unsigned UncondBrOpc; public: explicit MipsInstrInfo(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 0ae94ab..9fcc5fd 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -107,7 +107,7 @@ def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, // movn %got(d)($gp), %got(c)($gp), $4 // This instruction is illegal since movn can take only register operands. -def MipsWrapperPIC : SDNode<"MipsISD::WrapperPIC", SDTIntUnaryOp>; +def MipsWrapper : SDNode<"MipsISD::Wrapper", SDTIntUnaryOp>; // Pointer to dynamically allocated stack area. def MipsDynAlloc : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc, @@ -132,6 +132,8 @@ def NotMips64 : Predicate<"!Subtarget.hasMips64()">; def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; def IsN64 : Predicate<"Subtarget.isABI_N64()">; def NotN64 : Predicate<"!Subtarget.isABI_N64()">; +def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; +def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -194,12 +196,12 @@ def size_ins : Operand<i32> { // Transformation Function - get the lower 16 bits. def LO16 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF); + return getImm(N, N->getZExtValue() & 0xFFFF); }]>; // Transformation Function - get the higher 16 bits. def HI16 : SDNodeXForm<imm, [{ - return getI32Imm((unsigned)N->getZExtValue() >> 16); + return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF); }]>; // Node immediate fits as 16-bit sign extended on target immediate. @@ -380,21 +382,13 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, let isPseudo = Pseudo; } -// Memory Load/Store +// Unaligned Memory Load/Store let canFoldAsLoad = 1 in -class LoadX<bits<6> op, RegisterClass RC, - Operand MemOpnd>: - FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), - "", - [], IILoad> { -} +class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>: + FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {} -class StoreX<bits<6> op, RegisterClass RC, - Operand MemOpnd>: - FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), - "", - [], IIStore> { -} +class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>: + FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {} // 32-bit load. multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, @@ -415,10 +409,10 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, } // 32-bit load. -multiclass LoadX32<bits<6> op> { - def #NAME# : LoadX<op, CPURegs, mem>, +multiclass LoadUnAlign32<bits<6> op> { + def #NAME# : LoadUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; - def _P8 : LoadX<op, CPURegs, mem64>, + def _P8 : LoadUnAlign<op, CPURegs, mem64>, Requires<[IsN64]>; } // 32-bit store. @@ -440,18 +434,18 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, } // 32-bit store. -multiclass StoreX32<bits<6> op> { - def #NAME# : StoreX<op, CPURegs, mem>, +multiclass StoreUnAlign32<bits<6> op> { + def #NAME# : StoreUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; - def _P8 : StoreX<op, CPURegs, mem64>, + def _P8 : StoreUnAlign<op, CPURegs, mem64>, Requires<[IsN64]>; } // Conditional Branch class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: - CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16), - !strconcat(instr_asm, "\t$rs, $rt, $imm16"), - [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> { + BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16), + !strconcat(instr_asm, "\t$rs, $rt, $imm16"), + [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> { let isBranch = 1; let isTerminator = 1; let hasDelaySlot = 1; @@ -459,9 +453,9 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op, RegisterClass RC>: - CBranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16), - !strconcat(instr_asm, "\t$rs, $imm16"), - [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> { + BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16), + !strconcat(instr_asm, "\t$rs, $imm16"), + [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> { let rt = _rt; let isBranch = 1; let isTerminator = 1; @@ -485,11 +479,29 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od, [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))], IIAlu>; -// Unconditional branch -let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in +// Jump class JumpFJ<bits<6> op, string instr_asm>: FJ<op, (outs), (ins jmptarget:$target), - !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>; + !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> { + let isBranch=1; + let isTerminator=1; + let isBarrier=1; + let hasDelaySlot = 1; + let Predicates = [RelocStatic]; +} + +// Unconditional branch +class UncondBranch<bits<6> op, string instr_asm>: + BranchBase<op, (outs), (ins brtarget:$imm16), + !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> { + let rs = 0; + let rt = 0; + let isBranch = 1; + let isTerminator = 1; + let isBarrier = 1; + let hasDelaySlot = 1; + let Predicates = [RelocPIC]; +} let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, isIndirectBranch = 1 in @@ -616,21 +628,37 @@ class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>: } // Read Hardware -class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd), - "rdhwr\t$rt, $rd", [], IIAlu> { +class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass> + : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd), + "rdhwr\t$rt, $rd", [], IIAlu> { let rs = 0; let shamt = 0; } // Ext and Ins -class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins, - list<dag> pattern, InstrItinClass itin>: - FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), - pattern, itin>, Requires<[HasMips32r2]> { +class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>: + FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz), + !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), + [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> { bits<5> pos; bits<5> sz; let rd = sz; let shamt = pos; + let Predicates = [HasMips32r2]; +} + +class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>: + FR<0x1f, _funct, (outs RC:$rt), + (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src), + !strconcat(instr_asm, " $rt, $rs, $pos, $sz"), + [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))], + NoItinerary> { + bits<5> pos; + bits<5> sz; + let rd = sz; + let shamt = pos; + let Predicates = [HasMips32r2]; + let Constraints = "$src = $rt"; } // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). @@ -795,10 +823,10 @@ defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; defm USW : StoreM32<0x2b, "usw", store_u, 1>; /// Primitives for unaligned -defm LWL : LoadX32<0x22>; -defm LWR : LoadX32<0x26>; -defm SWL : StoreX32<0x2A>; -defm SWR : StoreX32<0x2E>; +defm LWL : LoadUnAlign32<0x22>; +defm LWR : LoadUnAlign32<0x26>; +defm SWL : StoreUnAlign32<0x2A>; +defm SWR : StoreUnAlign32<0x2E>; let hasSideEffects = 1 in def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", @@ -822,6 +850,7 @@ def J : JumpFJ<0x02, "j">; def JR : JumpFR<0x00, 0x08, "jr", CPURegs>; def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr">; +def B : UncondBranch<0x04, "b">; def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; def BNE : CBranch<0x05, "bne", setne, CPURegs>; def BGEZ : CBranchZero<0x01, 1, "bgez", setge, CPURegs>; @@ -888,21 +917,10 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>; def MUL : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>, Requires<[HasMips32]>; -def RDHWR : ReadHardware; - -def EXT : ExtIns<0, "ext", (outs CPURegs:$rt), - (ins CPURegs:$rs, uimm16:$pos, size_ext:$sz), - [(set CPURegs:$rt, - (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))], - NoItinerary>; +def RDHWR : ReadHardware<CPURegs, HWRegs>; -let Constraints = "$src = $rt" in -def INS : ExtIns<4, "ins", (outs CPURegs:$rt), - (ins CPURegs:$rs, uimm16:$pos, size_ins:$sz, CPURegs:$src), - [(set CPURegs:$rt, - (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz, - CPURegs:$src))], - NoItinerary>; +def EXT : ExtBase<0, "ext", CPURegs>; +def INS : InsBase<4, "ins", CPURegs>; //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions @@ -939,11 +957,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; +def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; +def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; @@ -953,6 +973,8 @@ def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), (ADDiu CPURegs:$hi, tjumptable:$lo)>; def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), (ADDiu CPURegs:$hi, tconstpool:$lo)>; +def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)), + (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; // gp_rel relocs def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), @@ -960,26 +982,17 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)), def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)), (ADDiu CPURegs:$gp, tconstpool:$in)>; -// tlsgd -def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)), - (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>; - -// tprel hi/lo -def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>; -def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>; -def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)), - (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>; - // wrapper_pic -class WrapperPICPat<SDNode node>: - Pat<(MipsWrapperPIC node:$in), - (ADDiu GP, node:$in)>; - -def : WrapperPICPat<tglobaladdr>; -def : WrapperPICPat<tconstpool>; -def : WrapperPICPat<texternalsym>; -def : WrapperPICPat<tblockaddress>; -def : WrapperPICPat<tjumptable>; +class WrapperPat<SDNode node, Instruction ADDiuOp, Register GPReg>: + Pat<(MipsWrapper node:$in), + (ADDiuOp GPReg, node:$in)>; + +def : WrapperPat<tglobaladdr, ADDiu, GP>; +def : WrapperPat<tconstpool, ADDiu, GP>; +def : WrapperPat<texternalsym, ADDiu, GP>; +def : WrapperPat<tblockaddress, ADDiu, GP>; +def : WrapperPat<tjumptable, ADDiu, GP>; +def : WrapperPat<tglobaltlsaddr, ADDiu, GP>; // Mips does not have "not", so we expand our way def : Pat<(not CPURegs:$in), diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 6fc2af1..23486d3 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -41,10 +41,14 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break; case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break; case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break; + case MipsII::MO_GOT16: Kind = MCSymbolRefExpr::VK_Mips_GOT16; break; case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break; case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break; case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break; case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break; + case MipsII::MO_TLSLDM: Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break; + case MipsII::MO_DTPREL_HI:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break; + case MipsII::MO_DTPREL_LO:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break; case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break; case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break; case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break; @@ -136,14 +140,35 @@ void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI, } // Lower ".cprestore offset" to "sw $gp, offset($sp)". -void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) { - OutMI.clear(); - OutMI.setOpcode(Mips::SW); - OutMI.addOperand(MCOperand::CreateReg(Mips::GP)); - OutMI.addOperand(MCOperand::CreateReg(Mips::SP)); +void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, + SmallVector<MCInst, 4>& MCInsts) { const MachineOperand &MO = MI->getOperand(0); assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); - OutMI.addOperand(MCOperand::CreateImm(MO.getImm())); + unsigned Offset = MO.getImm(), Reg = Mips::SP; + MCInst Sw; + + if (Offset >= 0x8000) { + unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0); + Offset &= 0xffff; + Reg = Mips::AT; + + // lui at,hi + // addu at,at,sp + MCInsts.resize(2); + MCInsts[0].setOpcode(Mips::LUi); + MCInsts[0].addOperand(MCOperand::CreateReg(Mips::AT)); + MCInsts[0].addOperand(MCOperand::CreateImm(Hi)); + MCInsts[1].setOpcode(Mips::ADDu); + MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); + MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); + MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP)); + } + + Sw.setOpcode(Mips::SW); + Sw.addOperand(MCOperand::CreateReg(Mips::GP)); + Sw.addOperand(MCOperand::CreateReg(Reg)); + Sw.addOperand(MCOperand::CreateImm(Offset)); + MCInsts.push_back(Sw); } MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 98e37e4..1490c14 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -36,7 +36,7 @@ public: MipsAsmPrinter &asmprinter); void Lower(const MachineInstr *MI, MCInst &OutMI) const; void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); - void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI); + void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerUnalignedLoadStore(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); private: diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 06c4a66..e5a0f08 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -125,6 +125,7 @@ getRegisterNumbering(unsigned RegEnum) case Mips::D14: return 28; case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64: + case Mips::HWR29: return 29; case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64: case Mips::D15: diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 925ad9e..76ee2e6 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -239,6 +239,7 @@ let Namespace = "Mips" in { // Hardware register $29 def HWR29 : Register<"29">; + def HWR29_64 : Register<"29">; } //===----------------------------------------------------------------------===// @@ -301,3 +302,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> { // Hardware registers def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; +def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; + diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 016d449..dc299f2 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -31,7 +31,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, { std::string CPUName = CPU; if (CPUName.empty()) - CPUName = "mips32r1"; + CPUName = "mips32"; // Parse features string. ParseSubtargetFeatures(CPUName, FS); diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 5d6b24f..02887fa 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -34,51 +34,51 @@ extern "C" void LLVMInitializeMipsTarget() { // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, - bool isLittle): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle), - DataLayout(isLittle ? - (Subtarget.isABI_N64() ? - "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : - "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : - (Subtarget.isABI_N64() ? - "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : - "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), JITInfo() { + bool isLittle) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, isLittle), + DataLayout(isLittle ? + (Subtarget.isABI_N64() ? + "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") : + (Subtarget.isABI_N64() ? + "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : + "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), + InstrInfo(*this), + FrameLowering(Subtarget), + TLInfo(*this), TSInfo(*this), JITInfo() { } MipsebTargetMachine:: MipsebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} MipselTargetMachine:: MipselTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} Mips64ebTargetMachine:: Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} Mips64elTargetMachine:: Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} + CodeGenOpt::Level OL) + : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. @@ -120,4 +120,3 @@ bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, PM.add(createMipsJITCodeEmitterPass(*this, JCE)); return false; } - diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index e40d9e2..6842373 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -38,7 +38,7 @@ namespace llvm { public: MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); @@ -82,7 +82,7 @@ namespace llvm { class MipsebTargetMachine : public MipsTargetMachine { public: MipsebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -92,7 +92,7 @@ public: class MipselTargetMachine : public MipsTargetMachine { public: MipselTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -103,6 +103,7 @@ class Mips64ebTargetMachine : public MipsTargetMachine { public: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -113,6 +114,7 @@ class Mips64elTargetMachine : public MipsTargetMachine { public: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt index 5692604..4172d00 100644 --- a/lib/Target/Mips/TargetInfo/CMakeLists.txt +++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMMipsInfo MipsTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMMipsInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMMipsInfo MipsCommonTableGen) diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt index 90ae260..2d42568 100644 --- a/lib/Target/Mips/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = MipsInfo parent = Mips required_libraries = MC Support Target add_to_library_groups = Mips - diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index 6709c1b..a9f4330 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -25,20 +25,6 @@ add_llvm_target(PTXCodeGen PTXTargetMachine.cpp ) -add_llvm_library_dependencies(LLVMPTXCodeGen - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMPTXDesc - LLVMPTXInfo - LLVMSelectionDAG - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) - add_subdirectory(TargetInfo) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt index 029d060..b252893 100644 --- a/lib/Target/PTX/InstPrinter/CMakeLists.txt +++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt @@ -6,8 +6,3 @@ add_llvm_library(LLVMPTXAsmPrinter add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen) -add_llvm_library_dependencies(LLVMPTXAsmPrinter - LLVMMC - LLVMSupport - ) - diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt index be89c10..af5d200 100644 --- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt +++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PTXAsmPrinter parent = PTX required_libraries = MC Support add_to_library_groups = PTX - diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index 2f6c92d..5fecb85 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -38,7 +38,50 @@ StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { } void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); + // Decode the register number into type and offset + unsigned RegSpace = RegNo & 0x7; + unsigned RegType = (RegNo >> 3) & 0x7; + unsigned RegOffset = RegNo >> 6; + + // Print the register + OS << "%"; + + switch (RegSpace) { + default: + llvm_unreachable("Unknown register space!"); + case PTXRegisterSpace::Reg: + switch (RegType) { + default: + llvm_unreachable("Unknown register type!"); + case PTXRegisterType::Pred: + OS << "p"; + break; + case PTXRegisterType::B16: + OS << "rh"; + break; + case PTXRegisterType::B32: + OS << "r"; + break; + case PTXRegisterType::B64: + OS << "rd"; + break; + case PTXRegisterType::F32: + OS << "f"; + break; + case PTXRegisterType::F64: + OS << "fd"; + break; + } + break; + case PTXRegisterSpace::Return: + OS << "ret"; + break; + case PTXRegisterSpace::Argument: + OS << "arg"; + break; + } + + OS << RegOffset; } void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, @@ -139,6 +182,8 @@ void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { O << "0000000000000000"; } + } else if (Op.isReg()) { + printRegName(O, Op.getReg()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); const MCExpr *Expr = Op.getExpr(); diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt index 22c70de..15a1eb5 100644 --- a/lib/Target/PTX/LLVMBuild.txt +++ b/lib/Target/PTX/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = PTX @@ -27,4 +30,3 @@ name = PTXCodeGen parent = PTX required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils add_to_library_groups = PTX - diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt index 94dbcee..d1fd74c 100644 --- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt @@ -3,11 +3,4 @@ add_llvm_library(LLVMPTXDesc PTXMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMPTXDesc - LLVMMC - LLVMPTXAsmPrinter - LLVMPTXInfo - LLVMSupport - ) - add_dependencies(LLVMPTXDesc PTXCommonTableGen) diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt index fff21c1..19b80c5 100644 --- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PTXDesc parent = PTX required_libraries = MC PTXAsmPrinter PTXInfo Support add_to_library_groups = PTX - diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h index c6094be..77a298d 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h +++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -17,6 +17,8 @@ #ifndef PTXBASEINFO_H #define PTXBASEINFO_H +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "PTXMCTargetDesc.h" namespace llvm { @@ -57,6 +59,75 @@ namespace llvm { RndPosInfInt = 10 // .rpi }; } // namespace PTXII + + namespace PTXRegisterType { + // Register type encoded in MCOperands + enum { + Pred = 0, + B16, + B32, + B64, + F32, + F64 + }; + } // namespace PTXRegisterType + + namespace PTXRegisterSpace { + // Register space encoded in MCOperands + enum { + Reg = 0, + Local, + Param, + Argument, + Return + }; + } + + inline static void decodeRegisterName(raw_ostream &OS, + unsigned EncodedReg) { + OS << "%"; + + unsigned RegSpace = EncodedReg & 0x7; + unsigned RegType = (EncodedReg >> 3) & 0x7; + unsigned RegOffset = EncodedReg >> 6; + + switch (RegSpace) { + default: + llvm_unreachable("Unknown register space!"); + case PTXRegisterSpace::Reg: + switch (RegType) { + default: + llvm_unreachable("Unknown register type!"); + case PTXRegisterType::Pred: + OS << "p"; + break; + case PTXRegisterType::B16: + OS << "rh"; + break; + case PTXRegisterType::B32: + OS << "r"; + break; + case PTXRegisterType::B64: + OS << "rd"; + break; + case PTXRegisterType::F32: + OS << "f"; + break; + case PTXRegisterType::F64: + OS << "fd"; + break; + } + break; + case PTXRegisterSpace::Return: + OS << "ret"; + break; + case PTXRegisterSpace::Argument: + OS << "arg"; + break; + } + + OS << RegOffset; + } } // namespace llvm #endif diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index bdf238b..77ed71d 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -51,23 +51,23 @@ using namespace llvm; static const char PARAM_PREFIX[] = "__param_"; static const char RETURN_PREFIX[] = "__ret_"; -static const char *getRegisterTypeName(unsigned RegNo, - const MachineRegisterInfo& MRI) { - const TargetRegisterClass *TRC = MRI.getRegClass(RegNo); - -#define TEST_REGCLS(cls, clsstr) \ - if (PTX::cls ## RegisterClass == TRC) return # clsstr; - - TEST_REGCLS(RegPred, pred); - TEST_REGCLS(RegI16, b16); - TEST_REGCLS(RegI32, b32); - TEST_REGCLS(RegI64, b64); - TEST_REGCLS(RegF32, b32); - TEST_REGCLS(RegF64, b64); -#undef TEST_REGCLS - - llvm_unreachable("Not in any register class!"); - return NULL; +static const char *getRegisterTypeName(unsigned RegType) { + switch (RegType) { + default: + llvm_unreachable("Unknown register type"); + case PTXRegisterType::Pred: + return ".pred"; + case PTXRegisterType::B16: + return ".b16"; + case PTXRegisterType::B32: + return ".b32"; + case PTXRegisterType::B64: + return ".b64"; + case PTXRegisterType::F32: + return ".f32"; + case PTXRegisterType::F64: + return ".f64"; + } } static const char *getStateSpaceName(unsigned addressSpace) { @@ -188,32 +188,32 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { unsigned numRegs; // pred - numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .pred %p<" << numRegs << ">;\n"; // i16 - numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .b16 %rh<" << numRegs << ">;\n"; // i32 - numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .b32 %r<" << numRegs << ">;\n"; // i64 - numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .b64 %rd<" << numRegs << ">;\n"; // f32 - numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .f32 %f<" << numRegs << ">;\n"; // f64 - numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass); + numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg); if(numRegs > 0) os << "\t.reg .f64 %fd<" << numRegs << ">;\n"; @@ -368,7 +368,6 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { const PTXParamManager &PM = MFI->getParamManager(); const bool isKernel = MFI->isKernel(); const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); - const MachineRegisterInfo& MRI = MF->getRegInfo(); SmallString<128> decl; raw_svector_ostream os(decl); @@ -391,7 +390,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { if (i != b) os << ", "; - os << ".reg ." << getRegisterTypeName(*i, MRI) << ' ' + os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' << MFI->getRegisterName(*i); } } @@ -450,7 +449,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { if (i != b) os << ", "; - os << ".reg ." << getRegisterTypeName(*i, MRI) << ' ' + os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' ' << MFI->getRegisterName(*i); } } @@ -521,20 +520,18 @@ MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { MCOperand MCOp; const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); - const MCExpr *Expr; - const char *RegSymbolName; + unsigned EncodedReg; switch (MO.getType()) { default: llvm_unreachable("Unknown operand type"); case MachineOperand::MO_Register: - // We create register operands as symbols, since the PTXInstPrinter class - // has no way to map virtual registers back to a name without some ugly - // hacks. - // FIXME: Figure out a better way to handle virtual register naming. - RegSymbolName = MFI->getRegisterName(MO.getReg()); - Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None, - OutContext); - MCOp = MCOperand::CreateExpr(Expr); + if (MO.getReg() > 0) { + // Encode the register + EncodedReg = MFI->getEncodedRegister(MO.getReg()); + } else { + EncodedReg = 0; + } + MCOp = MCOperand::CreateReg(EncodedReg); break; case MachineOperand::MO_Immediate: MCOp = MCOperand::CreateImm(MO.getImm()); diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp index 0b653e0..a21d172 100644 --- a/lib/Target/PTX/PTXFPRoundingModePass.cpp +++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -23,9 +23,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + // NOTE: PTXFPRoundingModePass should be executed just before emission. -namespace llvm { +namespace { /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to /// all FP instructions. Essentially, this pass just looks for all FP /// instructions that have a rounding mode set to RndDefault, and sets an @@ -58,7 +60,7 @@ namespace llvm { void initializeMap(); void processInstruction(MachineInstr &MI); }; // class PTXFPRoundingModePass -} // namespace llvm +} // end anonymous namespace using namespace llvm; diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index 17191fb..a012297 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -243,6 +243,30 @@ SDValue PTXTargetLowering:: for (unsigned i = 0, e = Ins.size(); i != e; ++i) { EVT RegVT = Ins[i].VT; TargetRegisterClass* TRC = getRegClassFor(RegVT); + unsigned RegType; + + // Determine which register class we need + if (RegVT == MVT::i1) { + RegType = PTXRegisterType::Pred; + } + else if (RegVT == MVT::i16) { + RegType = PTXRegisterType::B16; + } + else if (RegVT == MVT::i32) { + RegType = PTXRegisterType::B32; + } + else if (RegVT == MVT::i64) { + RegType = PTXRegisterType::B64; + } + else if (RegVT == MVT::f32) { + RegType = PTXRegisterType::F32; + } + else if (RegVT == MVT::f64) { + RegType = PTXRegisterType::F64; + } + else { + llvm_unreachable("Unknown parameter type"); + } // Use a unique index in the instruction to prevent instruction folding. // Yes, this is a hack. @@ -253,7 +277,7 @@ SDValue PTXTargetLowering:: InVals.push_back(ArgValue); - MFI->addArgReg(Reg); + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument); } } @@ -304,25 +328,32 @@ SDValue PTXTargetLowering:: for (unsigned i = 0, e = Outs.size(); i != e; ++i) { EVT RegVT = Outs[i].VT; TargetRegisterClass* TRC = 0; + unsigned RegType; // Determine which register class we need if (RegVT == MVT::i1) { TRC = PTX::RegPredRegisterClass; + RegType = PTXRegisterType::Pred; } else if (RegVT == MVT::i16) { TRC = PTX::RegI16RegisterClass; + RegType = PTXRegisterType::B16; } else if (RegVT == MVT::i32) { TRC = PTX::RegI32RegisterClass; + RegType = PTXRegisterType::B32; } else if (RegVT == MVT::i64) { TRC = PTX::RegI64RegisterClass; + RegType = PTXRegisterType::B64; } else if (RegVT == MVT::f32) { TRC = PTX::RegF32RegisterClass; + RegType = PTXRegisterType::F32; } else if (RegVT == MVT::f64) { TRC = PTX::RegF64RegisterClass; + RegType = PTXRegisterType::F64; } else { llvm_unreachable("Unknown parameter type"); @@ -335,7 +366,7 @@ SDValue PTXTargetLowering:: Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); - MFI->addRetReg(Reg); + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return); } } diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 1b947a5..871b3a7 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -116,7 +116,7 @@ bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { } bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - return !isPredicated(MI) && get(MI->getOpcode()).isTerminator(); + return !isPredicated(MI) && MI->isTerminator(); } bool PTXInstrInfo:: @@ -184,15 +184,13 @@ AnalyzeBranch(MachineBasicBlock &MBB, if (MBB.empty()) return true; - MachineBasicBlock::const_iterator iter = MBB.end(); + MachineBasicBlock::iterator iter = MBB.end(); const MachineInstr& instLast1 = *--iter; - const MCInstrDesc &desc1 = instLast1.getDesc(); // for special case that MBB has only 1 instruction const bool IsSizeOne = MBB.size() == 1; // if IsSizeOne is true, *--iter and instLast2 are invalid // we put a dummy value in instLast2 and desc2 since they are used const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; - const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); DEBUG(dbgs() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); @@ -207,7 +205,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, } // this block ends with only an unconditional branch - if (desc1.isUnconditionalBranch() && + if (instLast1.isUnconditionalBranch() && // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); @@ -217,7 +215,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, // this block ends with a conditional branch and // it falls through to a successor block - if (desc1.isConditionalBranch() && + if (instLast1.isConditionalBranch() && IsAnySuccessorAlsoLayoutSuccessor(MBB)) { DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); TBB = GetBranchTarget(instLast1); @@ -233,8 +231,8 @@ AnalyzeBranch(MachineBasicBlock &MBB, // this block ends with a conditional branch // followed by an unconditional branch - if (desc2.isConditionalBranch() && - desc1.isUnconditionalBranch()) { + if (instLast2.isConditionalBranch() && + instLast1.isUnconditionalBranch()) { DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); TBB = GetBranchTarget(instLast2); FBB = GetBranchTarget(instLast1); @@ -341,8 +339,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { } bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { - const MCInstrDesc &desc = inst.getDesc(); - return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); + return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch(); } bool PTXInstrInfo:: diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index bcd5bcf..19a862f 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -825,17 +825,17 @@ let hasSideEffects = 1 in { ///===- Parameter Passing Pseudo-Instructions -----------------------------===// def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), - "mov.pred\t$a, %param$b", []>; + "mov.pred\t$a, %arg$b", []>; def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), - "mov.b16\t$a, %param$b", []>; + "mov.b16\t$a, %arg$b", []>; def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), - "mov.b32\t$a, %param$b", []>; + "mov.b32\t$a, %arg$b", []>; def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), - "mov.b64\t$a, %param$b", []>; + "mov.b64\t$a, %arg$b", []>; def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), - "mov.f32\t$a, %param$b", []>; + "mov.f32\t$a, %arg$b", []>; def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), - "mov.f64\t$a, %param$b", []>; + "mov.f64\t$a, %arg$b", []>; def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index b33a273..26ec623 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -22,9 +22,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + // NOTE: PTXMFInfoExtract must after register allocation! -namespace llvm { +namespace { /// PTXMFInfoExtract - PTX specific code to extract of PTX machine /// function information for PTXAsmPrinter /// @@ -42,7 +44,7 @@ namespace llvm { return "PTX Machine Function Info Extractor"; } }; // class PTXMFInfoExtract -} // namespace llvm +} // end anonymous namespace using namespace llvm; @@ -56,7 +58,20 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); const TargetRegisterClass *TRC = MRI.getRegClass(Reg); - MFI->addVirtualRegister(TRC, Reg); + unsigned RegType; + if (TRC == PTX::RegPredRegisterClass) + RegType = PTXRegisterType::Pred; + else if (TRC == PTX::RegI16RegisterClass) + RegType = PTXRegisterType::B16; + else if (TRC == PTX::RegI32RegisterClass) + RegType = PTXRegisterType::B32; + else if (TRC == PTX::RegI64RegisterClass) + RegType = PTXRegisterType::B64; + else if (TRC == PTX::RegF32RegisterClass) + RegType = PTXRegisterType::F32; + else if (TRC == PTX::RegF64RegisterClass) + RegType = PTXRegisterType::F64; + MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg); } return false; diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 3b985f7..1a2878c 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -35,15 +35,22 @@ private: DenseSet<unsigned> RegArgs; DenseSet<unsigned> RegRets; - typedef std::vector<unsigned> RegisterList; - typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap; - typedef DenseMap<unsigned, std::string> RegisterNameMap; typedef DenseMap<int, std::string> FrameMap; - RegisterMap UsedRegs; - RegisterNameMap RegNames; FrameMap FrameSymbols; + struct RegisterInfo { + unsigned Reg; + unsigned Type; + unsigned Space; + unsigned Offset; + unsigned Encoded; + }; + + typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap; + + RegisterInfoMap RegInfo; + PTXParamManager ParamManager; public: @@ -51,13 +58,7 @@ public: PTXMachineFunctionInfo(MachineFunction &MF) : IsKernel(false) { - UsedRegs[PTX::RegPredRegisterClass] = RegisterList(); - UsedRegs[PTX::RegI16RegisterClass] = RegisterList(); - UsedRegs[PTX::RegI32RegisterClass] = RegisterList(); - UsedRegs[PTX::RegI64RegisterClass] = RegisterList(); - UsedRegs[PTX::RegF32RegisterClass] = RegisterList(); - UsedRegs[PTX::RegF64RegisterClass] = RegisterList(); - } + } /// getParamManager - Returns the PTXParamManager instance for this function. PTXParamManager& getParamManager() { return ParamManager; } @@ -78,69 +79,106 @@ public: reg_iterator retreg_begin() const { return RegRets.begin(); } reg_iterator retreg_end() const { return RegRets.end(); } + /// addRegister - Adds a virtual register to the set of all used registers + void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) { + if (!RegInfo.count(Reg)) { + RegisterInfo Info; + Info.Reg = Reg; + Info.Type = RegType; + Info.Space = RegSpace; + + // Determine register offset + Info.Offset = 0; + for(RegisterInfoMap::const_iterator i = RegInfo.begin(), + e = RegInfo.end(); i != e; ++i) { + const RegisterInfo& RI = i->second; + if (RI.Space == RegSpace) + if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type) + Info.Offset++; + } + + // Encode the register data into a single register number + Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space; + + RegInfo[Reg] = Info; + + if (RegSpace == PTXRegisterSpace::Argument) + RegArgs.insert(Reg); + else if (RegSpace == PTXRegisterSpace::Return) + RegRets.insert(Reg); + } + } + + /// countRegisters - Returns the number of registers of the given type and + /// space. + unsigned countRegisters(unsigned RegType, unsigned RegSpace) const { + unsigned Count = 0; + for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end(); + i != e; ++i) { + const RegisterInfo& RI = i->second; + if (RI.Type == RegType && RI.Space == RegSpace) + Count++; + } + return Count; + } + + /// getEncodedRegister - Returns the encoded value of the register. + unsigned getEncodedRegister(unsigned Reg) const { + return RegInfo.lookup(Reg).Encoded; + } + /// addRetReg - Adds a register to the set of return-value registers. void addRetReg(unsigned Reg) { if (!RegRets.count(Reg)) { RegRets.insert(Reg); - std::string name; - name = "%ret"; - name += utostr(RegRets.size() - 1); - RegNames[Reg] = name; } } /// addArgReg - Adds a register to the set of function argument registers. void addArgReg(unsigned Reg) { RegArgs.insert(Reg); - std::string name; - name = "%param"; - name += utostr(RegArgs.size() - 1); - RegNames[Reg] = name; - } - - /// addVirtualRegister - Adds a virtual register to the set of all used - /// registers in the function. - void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) { - std::string name; - - // Do not count registers that are argument/return registers. - if (!RegRets.count(Reg) && !RegArgs.count(Reg)) { - UsedRegs[TRC].push_back(Reg); - if (TRC == PTX::RegPredRegisterClass) - name = "%p"; - else if (TRC == PTX::RegI16RegisterClass) - name = "%rh"; - else if (TRC == PTX::RegI32RegisterClass) - name = "%r"; - else if (TRC == PTX::RegI64RegisterClass) - name = "%rd"; - else if (TRC == PTX::RegF32RegisterClass) - name = "%f"; - else if (TRC == PTX::RegF64RegisterClass) - name = "%fd"; - else - llvm_unreachable("Invalid register class"); - - name += utostr(UsedRegs[TRC].size() - 1); - RegNames[Reg] = name; - } } /// getRegisterName - Returns the name of the specified virtual register. This /// name is used during PTX emission. - const char *getRegisterName(unsigned Reg) const { - if (RegNames.count(Reg)) - return RegNames.find(Reg)->second.c_str(); + std::string getRegisterName(unsigned Reg) const { + if (RegInfo.count(Reg)) { + const RegisterInfo& RI = RegInfo.lookup(Reg); + std::string Name; + raw_string_ostream NameStr(Name); + decodeRegisterName(NameStr, RI.Encoded); + NameStr.flush(); + return Name; + } else if (Reg == PTX::NoRegister) return "%noreg"; else llvm_unreachable("Register not in register name map"); } - /// getNumRegistersForClass - Returns the number of virtual registers that are - /// used for the specified register class. - unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const { - return UsedRegs.lookup(TRC).size(); + /// getEncodedRegisterName - Returns the name of the encoded register. + std::string getEncodedRegisterName(unsigned EncodedReg) const { + std::string Name; + raw_string_ostream NameStr(Name); + decodeRegisterName(NameStr, EncodedReg); + NameStr.flush(); + return Name; + } + + /// getRegisterType - Returns the type of the specified virtual register. + unsigned getRegisterType(unsigned Reg) const { + if (RegInfo.count(Reg)) + return RegInfo.lookup(Reg).Type; + else + llvm_unreachable("Unknown register"); + } + + /// getOffsetForRegister - Returns the offset of the virtual register + unsigned getOffsetForRegister(unsigned Reg) const { + if (RegInfo.count(Reg)) + return RegInfo.lookup(Reg).Offset; + else + return 0; } /// getFrameSymbol - Returns the symbol name for the given FrameIndex. @@ -148,13 +186,13 @@ public: if (FrameSymbols.count(FrameIndex)) { return FrameSymbols.lookup(FrameIndex).c_str(); } else { - std::string Name = "__local"; - Name += utostr(FrameIndex); + std::string Name = "__local"; + Name += utostr(FrameIndex); // The whole point of caching this name is to ensure the pointer we pass // to any getExternalSymbol() calls will remain valid for the lifetime of // the back-end instance. This is to work around an issue in SelectionDAG // where symbol names are expected to be life-long strings. - FrameSymbols[FrameIndex] = Name; + FrameSymbols[FrameIndex] = Name; return FrameSymbols[FrameIndex].c_str(); } } diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 292ea5e..4efdc27 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -67,30 +67,16 @@ namespace { "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; const char* DataLayout64 = "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; - - // Copied from LLVMTargetMachine.cpp - void printNoVerify(PassManagerBase &PM, const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - } - - void printAndVerify(PassManagerBase &PM, - const char *Banner) { - if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); - - //if (VerifyMachineCode) - // PM.add(createMachineVerifierPass(Banner)); - } } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), @@ -101,16 +87,18 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { + : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) { diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 19f6c0f..22911f7 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -35,7 +35,7 @@ class PTXTargetMachine : public LLVMTargetMachine { public: PTXTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); @@ -94,7 +94,7 @@ class PTX32TargetMachine : public PTXTargetMachine { public: PTX32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; // class PTX32TargetMachine @@ -103,7 +103,7 @@ class PTX64TargetMachine : public PTXTargetMachine { public: PTX64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; // class PTX32TargetMachine diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt index 2366e45..d9a5da3 100644 --- a/lib/Target/PTX/TargetInfo/CMakeLists.txt +++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMPTXInfo PTXTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMPTXInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMPTXInfo PTXCommonTableGen) diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt index 8e5285a..2cc30c4 100644 --- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PTXInfo parent = PTX required_libraries = MC Support Target add_to_library_groups = PTX - diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 05c1ffd..1b85495 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -27,20 +27,6 @@ add_llvm_target(PowerPCCodeGen PPCSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMPowerPCCodeGen - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMPowerPCAsmPrinter - LLVMPowerPCDesc - LLVMPowerPCInfo - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt index 1d857e2..a605cc4 100644 --- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMPowerPCAsmPrinter PPCInstPrinter.cpp ) -add_llvm_library_dependencies(LLVMPowerPCAsmPrinter - LLVMMC - LLVMSupport - ) - add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt index afbb2b1..7c691de 100644 --- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt +++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PowerPCAsmPrinter parent = PowerPC required_libraries = MC Support add_to_library_groups = PowerPC - diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index 5baa988..95fac54 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = PowerPC @@ -28,4 +31,3 @@ name = PowerPCCodeGen parent = PowerPC required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target add_to_library_groups = PowerPC - diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt index c4041db..febf438 100644 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt @@ -6,11 +6,4 @@ add_llvm_library(LLVMPowerPCDesc PPCPredicates.cpp ) -add_llvm_library_dependencies(LLVMPowerPCDesc - LLVMMC - LLVMPowerPCAsmPrinter - LLVMPowerPCInfo - LLVMSupport - ) - add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt index fc2da83..d3a567d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PowerPCDesc parent = PowerPC required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support add_to_library_groups = PowerPC - diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9f2fd6d..34a5774 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -93,6 +93,16 @@ public: // FIXME. return false; } + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME. + assert(0 && "RelaxInstruction() unimplemented"); + return false; + } + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { // FIXME. diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 56f622e..5dc2d3d 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -365,11 +365,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::MFCRpseud: + case PPC::MFCR8pseud: // Transform: %R3 = MFCRpseud %CR7 // Into: %R3 = MFCR ;; cr7 OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(1).getReg())); - TmpInst.setOpcode(PPC::MFCR); + TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); OutStreamer.EmitInstruction(TmpInst); return; @@ -441,7 +442,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { Directive = PPC::DIR_970; if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) Directive = PPC::DIR_7400; - if (Subtarget.isPPC64() && Directive < PPC::DIR_970) + if (Subtarget.isPPC64() && Directive < PPC::DIR_64) Directive = PPC::DIR_64; assert(Directive <= PPC::DIR_64 && "Directive out of range."); diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 4a1f182..9d2f4d0 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || + MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } @@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, if (MO.isReg()) { // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. - assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && + MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); return getPPCRegisterNumbering(MO.getReg()); } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0b85fea..5c45018 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // epilog blocks. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) { + if (!I->empty() && I->back().isReturn()) { bool FoundIt = false; for (MBBI = I->end(); MBBI != I->begin(); ) { --MBBI; @@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { if (MF.getFunction()->hasFnAttr(Attribute::Naked)) return false; - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || - (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects() || + (MF.getTarget().Options.GuaranteedTailCallOpt && + MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } @@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization - if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR && + if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR && MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); @@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + (TCSPDelta = FI->getTailCallSPDelta()) < 0) { MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } @@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: doesn't detect whether or not we need to spill vXX, which requires // r0 for now. - if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable. + if (RegInfo->requiresRegisterScavenging(MF)) if (needsFP(MF) || spillsCR(MF)) { const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; @@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Take into account stack space reserved for tail calls. int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { LowerBound = TCSPDelta; } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 3197fc8..ae317af 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -27,7 +27,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { const MCInstrDesc *MCID = DAG->getInstrDesc(SU); if (!MCID) { // This is a PPC pseudo-instruction. - // FIXME: Should something else be done? return; } @@ -62,6 +61,7 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) : TII(tii) { + LastWasBL8_ELF = false; EndDispatchGroup(); } @@ -80,12 +80,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle, bool &isCracked, bool &isLoad, bool &isStore) { - if ((int)Opcode >= 0) { - isFirst = isSingle = isCracked = isLoad = isStore = false; - return PPCII::PPC970_Pseudo; - } - Opcode = ~Opcode; - const MCInstrDesc &MCID = TII.get(Opcode); isLoad = MCID.mayLoad(); @@ -102,29 +96,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, /// isLoadOfStoredAddress - If we have a load from the previously stored pointer /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. bool PPCHazardRecognizer970:: -isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { +isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, + const Value *LoadValue) const { for (unsigned i = 0, e = NumStores; i != e; ++i) { // Handle exact and commuted addresses. - if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) - return true; - if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) + if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) return true; // Okay, we don't have an exact match, if this is an indexed offset, see if // we have overlap (which happens during fp->int conversion for example). - if (StorePtr2[i] == Ptr2) { - if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i])) - if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) { - // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check - // to see if the load and store actually overlap. - int StoreOffs = StoreOffset->getZExtValue(); - int LoadOffs = LoadOffset->getZExtValue(); - if (StoreOffs < LoadOffs) { - if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true; - } else { - if (int(LoadOffs+LoadSize) > StoreOffs) return true; - } - } + if (StoreValue[i] == LoadValue) { + // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check + // to see if the load and store actually overlap. + if (StoreOffset[i] < LoadOffset) { + if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; + } else { + if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; + } } } return false; @@ -138,13 +126,26 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: getHazardType(SUnit *SU, int Stalls) { assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); - const SDNode *Node = SU->getNode()->getGluedMachineNode(); + MachineInstr *MI = SU->getInstr(); + + if (MI->isDebugValue()) + return NoHazard; + + unsigned Opcode = MI->getOpcode(); + + // If the last instruction was a BL8_ELF, then the NOP must follow it + // directly (this is strong requirement from the linker due to the ELF ABI). + // We return only Hazard (and not NoopHazard) because if the NOP is necessary + // then it will already be in the instruction stream (it is not always + // necessary; tail calls, for example, do not need it). + if (LastWasBL8_ELF && Opcode != PPC::NOP) + return Hazard; + bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = - GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + GetInstrType(Opcode, isFirst, isSingle, isCracked, isLoad, isStore); if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; - unsigned Opcode = Node->getMachineOpcode(); // We can only issue a PPC970_First/PPC970_Single instruction (such as // crand/mtspr/etc) if this is the first cycle of the dispatch group. @@ -181,55 +182,10 @@ getHazardType(SUnit *SU, int Stalls) { // If this is a load following a store, make sure it's not to the same or // overlapping address. - if (isLoad && NumStores) { - unsigned LoadSize; - switch (Opcode) { - default: llvm_unreachable("Unknown load!"); - case PPC::LBZ: case PPC::LBZU: - case PPC::LBZX: - case PPC::LBZ8: case PPC::LBZU8: - case PPC::LBZX8: - case PPC::LVEBX: - LoadSize = 1; - break; - case PPC::LHA: case PPC::LHAU: - case PPC::LHAX: - case PPC::LHZ: case PPC::LHZU: - case PPC::LHZX: - case PPC::LVEHX: - case PPC::LHBRX: - case PPC::LHA8: case PPC::LHAU8: - case PPC::LHAX8: - case PPC::LHZ8: case PPC::LHZU8: - case PPC::LHZX8: - LoadSize = 2; - break; - case PPC::LFS: case PPC::LFSU: - case PPC::LFSX: - case PPC::LWZ: case PPC::LWZU: - case PPC::LWZX: - case PPC::LWA: - case PPC::LWAX: - case PPC::LVEWX: - case PPC::LWBRX: - case PPC::LWZ8: - case PPC::LWZX8: - LoadSize = 4; - break; - case PPC::LFD: case PPC::LFDU: - case PPC::LFDX: - case PPC::LD: case PPC::LDU: - case PPC::LDX: - LoadSize = 8; - break; - case PPC::LVX: - case PPC::LVXL: - LoadSize = 16; - break; - } - - if (isLoadOfStoredAddress(LoadSize, - Node->getOperand(0), Node->getOperand(1))) + if (isLoad && NumStores && !MI->memoperands_empty()) { + MachineMemOperand *MO = *MI->memoperands_begin(); + if (isLoadOfStoredAddress(MO->getSize(), + MO->getOffset(), MO->getValue())) return NoopHazard; } @@ -237,66 +193,29 @@ getHazardType(SUnit *SU, int Stalls) { } void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { - const SDNode *Node = SU->getNode()->getGluedMachineNode(); + MachineInstr *MI = SU->getInstr(); + + if (MI->isDebugValue()) + return; + + unsigned Opcode = MI->getOpcode(); + LastWasBL8_ELF = (Opcode == PPC::BL8_ELF); + bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = - GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + GetInstrType(Opcode, isFirst, isSingle, isCracked, isLoad, isStore); if (InstrType == PPCII::PPC970_Pseudo) return; - unsigned Opcode = Node->getMachineOpcode(); // Update structural hazard information. if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; // Track the address stored to. - if (isStore) { - unsigned ThisStoreSize; - switch (Opcode) { - default: llvm_unreachable("Unknown store instruction!"); - case PPC::STB: case PPC::STB8: - case PPC::STBU: case PPC::STBU8: - case PPC::STBX: case PPC::STBX8: - case PPC::STVEBX: - ThisStoreSize = 1; - break; - case PPC::STH: case PPC::STH8: - case PPC::STHU: case PPC::STHU8: - case PPC::STHX: case PPC::STHX8: - case PPC::STVEHX: - case PPC::STHBRX: - ThisStoreSize = 2; - break; - case PPC::STFS: - case PPC::STFSU: - case PPC::STFSX: - case PPC::STWX: case PPC::STWX8: - case PPC::STWUX: - case PPC::STW: case PPC::STW8: - case PPC::STWU: - case PPC::STVEWX: - case PPC::STFIWX: - case PPC::STWBRX: - ThisStoreSize = 4; - break; - case PPC::STD_32: - case PPC::STDX_32: - case PPC::STD: - case PPC::STDU: - case PPC::STFD: - case PPC::STFDX: - case PPC::STDX: - case PPC::STDUX: - ThisStoreSize = 8; - break; - case PPC::STVX: - case PPC::STVXL: - ThisStoreSize = 16; - break; - } - - StoreSize[NumStores] = ThisStoreSize; - StorePtr1[NumStores] = Node->getOperand(1); - StorePtr2[NumStores] = Node->getOperand(2); + if (isStore && NumStores < 4 && !MI->memoperands_empty()) { + MachineMemOperand *MO = *MI->memoperands_begin(); + StoreSize[NumStores] = MO->getSize(); + StoreOffset[NumStores] = MO->getOffset(); + StoreValue[NumStores] = MO->getValue(); ++NumStores; } @@ -319,3 +238,9 @@ void PPCHazardRecognizer970::AdvanceCycle() { if (NumIssued == 5) EndDispatchGroup(); } + +void PPCHazardRecognizer970::Reset() { + LastWasBL8_ELF = false; + EndDispatchGroup(); +} + diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 32fac91..95d0d64 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -49,14 +49,18 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. bool HasCTRSet; + // Was the last instruction issued a BL8_ELF + bool LastWasBL8_ELF; + // StoredPtr - Keep track of the address of any store. If we see a load from // the same address (or one that aliases it), disallow the store. We can have // up to four stores in one dispatch group, hence we track up to 4. // // This is null if we haven't seen a store yet. We keep track of both // operands of the store here, since we support [r+r] and [r+i] addressing. - SDValue StorePtr1[4], StorePtr2[4]; - unsigned StoreSize[4]; + const Value *StoreValue[4]; + int64_t StoreOffset[4]; + uint64_t StoreSize[4]; unsigned NumStores; public: @@ -64,6 +68,7 @@ public: virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); + virtual void Reset(); private: /// EndDispatchGroup - Called when we are finishing a new dispatch group. @@ -76,8 +81,8 @@ private: bool &isFirst, bool &isSingle,bool &isCracked, bool &isLoad, bool &isStore); - bool isLoadOfStoredAddress(unsigned LoadSize, - SDValue Ptr1, SDValue Ptr2) const; + bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, + const Value *LoadValue) const; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3dee406..4a509a3 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -210,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Find all return blocks, outputting a restore in each epilog. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (!BB->empty() && BB->back().getDesc().isReturn()) { + if (!BB->empty() && BB->back().isReturn()) { IP = BB->end(); --IP; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = IP; - while (I2 != BB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != BB->begin() && (--I2)->isTerminator()) IP = I2; // Emit: MTVRSAVE InVRSave @@ -1066,7 +1066,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Target = N->getOperand(1); unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; - Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target, + Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, Chain), 0); return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 36d5c41..f3a3d17 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -103,6 +103,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); + // We do not currently implment this libm ops for PowerPC. + setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); + setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); + setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); + setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); + // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); @@ -146,9 +153,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); @@ -332,7 +343,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle @@ -1667,7 +1680,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. @@ -1857,7 +1871,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); @@ -2263,9 +2278,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); // Tail call needs the stack to be aligned. - if (CC==CallingConv::Fast && GuaranteedTailCallOpt) { - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); + if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ + unsigned TargetAlign = DAG.getMachineFunction().getTarget(). + getFrameLowering()->getStackAlignment(); unsigned AlignMask = TargetAlign-1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; } @@ -2299,7 +2314,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - if (!GuaranteedTailCallOpt) + if (!getTargetMachine().Options.GuaranteedTailCallOpt) return false; // Variable argument functions are not supported. @@ -2752,7 +2767,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. int BytesCalleePops = - (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0; + (CallConv == CallingConv::Fast && + getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; if (InFlag.getNode()) Ops.push_back(InFlag); @@ -2868,7 +2884,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage @@ -3075,7 +3092,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); unsigned nAltivecParamsAtEnd = 0; @@ -5754,7 +5772,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && + bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects()) && MFI->getStackSize() && !MF.getFunction()->hasFnAttr(Attribute::Naked); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index e88ad37..cdbc264 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -223,6 +223,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; +// 64-but CR instructions +def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), + "mtcrf $FXM, $rS", BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), + "", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), + "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -469,6 +481,12 @@ def RLDICR : MDForm_1<30, 1, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), "rldicr $rA, $rS, $SH, $ME", IntRotateD, []>, isPPC64; + +def RLWINM8 : MForm_2<21, + (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>; + } // End FXU Operations. diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index b9a6297..6d16f1d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,8 +33,8 @@ #include "PPCGenInstrInfo.inc" namespace llvm { -extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. -extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +extern cl::opt<bool> DisablePPC32RS; +extern cl::opt<bool> DisablePPC64RS; } using namespace llvm; @@ -48,25 +48,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { - // Should use subtarget info to pick the right hazard recognizer. For - // now, always return a PPC970 recognizer. - const TargetInstrInfo *TII = TM->getInstrInfo(); - (void)TII; - assert(TII && "No InstrInfo?"); - unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); if (Directive == PPC::DIR_440) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new PPCHazardRecognizer440(II, DAG); } - else { - // Disable the hazard recognizer for now, as it doesn't support - // bottom-up scheduling. - //return new PPCHazardRecognizer970(*TII); - return new ScheduleHazardRecognizer(); - } + + return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); } +/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer +/// to use for this target when scheduling the DAG. +ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + + // Most subtargets use a PPC970 recognizer. + if (Directive != PPC::DIR_440) { + const TargetInstrInfo *TII = TM.getInstrInfo(); + assert(TII && "No InstrInfo?"); + + return new PPCHazardRecognizer970(*TII); + } + + return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); +} unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { @@ -338,6 +345,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } +// This function returns true if a CR spill is necessary and false otherwise. bool PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, @@ -369,7 +377,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); } else { // FIXME: this spills LR immediately to memory in one step. To do this, - // we use R11, which we know cannot be used in the prolog/epilog. This is + // we use X11, which we know cannot be used in the prolog/epilog. This is // a hack. NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) @@ -388,9 +396,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { - if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - // FIXME (64-bit): Enable + if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) .addReg(SrcReg, getKillRegState(isKill)), @@ -403,11 +410,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // We hack this on Darwin by reserving R2. It's probably broken on Linux // at the moment. + bool is64Bit = TM.getSubtargetImpl()->isPPC64(); // We need to store the CR in the low 4-bits of the saved value. First, // issue a MFCR to save all of the CRBits. unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg) + (is64Bit ? PPC::X2 : PPC::R2) : + (is64Bit ? PPC::X0 : PPC::R0); + NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : + PPC::MFCRpseud), ScratchReg) .addReg(SrcReg, getKillRegState(isKill))); // If the saved register wasn't CR0, shift the bits left so that they are @@ -415,12 +425,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, if (SrcReg != PPC::CR0) { unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : + PPC::RLWINM), ScratchReg) .addReg(ScratchReg).addImm(ShiftBits) .addImm(0).addImm(31)); } - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? + PPC::STW8 : PPC::STW)) .addReg(ScratchReg, getKillRegState(isKill)), FrameIdx)); @@ -504,7 +516,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, NewMIs.back()->addMemOperand(MF, MMO); } -void +bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, @@ -524,8 +536,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, FrameIdx)); } else { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), - PPC::R11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11)); + PPC::X11), FrameIdx)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11)); } } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), @@ -534,28 +546,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); + if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg) + , FrameIdx)); + return true; + } else { + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + ScratchReg), FrameIdx)); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) + .addImm(31)); + } + + NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? + PPC::MTCRF8 : PPC::MTCRF), DestReg) + .addReg(ScratchReg)); } - - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) - .addReg(ScratchReg)); } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { unsigned Reg = 0; @@ -600,6 +621,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, } else { llvm_unreachable("Unknown regclass!"); } + + return false; } void @@ -612,7 +635,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setSpillsCR(); + } for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 90bacc9..e90f8cb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; - void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; @@ -88,6 +88,9 @@ public: ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const; + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 17f63e0..d4c9d10 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -349,7 +349,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!NoExcessFPPrecision">; +def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; def IsBookE : Predicate<"PPCSubTarget.isBookE()">; @@ -399,7 +399,14 @@ let usesCustomInserter = 1, // Expanded after instruction selection. // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. -def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F), +let mayStore = 1 in +def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F), + "", []>; + +// RESTORE_CR - Indicate that we're restoring the CR register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), "", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { @@ -1091,7 +1098,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; -def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), +def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 3ba9260..27f7f4a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,15 +46,14 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -// FIXME (64-bit): Eventually enable by default. namespace llvm { -cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", +cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", cl::init(false), - cl::desc("Enable PPC32 register scavenger"), + cl::desc("Disable PPC32 register scavenger"), cl::Hidden); -cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger", +cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", cl::init(false), - cl::desc("Enable PPC64 register scavenger"), + cl::desc("Disable PPC64 register scavenger"), cl::Hidden); } @@ -63,8 +62,8 @@ using namespace llvm; // FIXME (64-bit): Should be inlined. bool PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((EnablePPC32RS && !Subtarget.isPPC64()) || - (EnablePPC64RS && Subtarget.isPPC64())); + return ((!DisablePPC32RS && !Subtarget.isPPC64()) || + (!DisablePPC64RS && Subtarget.isPPC64())); } PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, @@ -120,10 +119,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::LR, 0 }; @@ -149,10 +144,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - 0 }; // 64-bit Darwin calling convention. @@ -174,10 +165,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::LR8, 0 }; @@ -203,10 +190,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - 0 }; @@ -247,9 +230,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R13); Reserved.set(PPC::R31); - if (!requiresRegisterScavenging(MF)) - Reserved.set(PPC::R0); // FIXME (64-bit): Remove - Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); @@ -259,7 +239,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // Reserve X2 on Darwin to hack around the problem of save/restore of CR // when the stack frame is too big to address directly; we need two regs. // This is a hack. if (Subtarget.isDarwinABI()) { @@ -291,6 +271,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case PPC::F4RCRegClassID: case PPC::VRRCRegClassID: return 32 - DefaultSafety; + case PPC::CRRCRegClassID: + return 8 - DefaultSafety; } } @@ -301,7 +283,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, void PPCRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + I->getOpcode() == PPC::ADJCALLSTACKUP) { // Add (actually subtract) back the amount the callee popped on return. if (int CalleeAmt = I->getOperand(1).getImm()) { bool is64Bit = Subtarget.isPPC64(); @@ -476,28 +459,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const { // Get the instruction. - MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI> + MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; - unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); - unsigned SrcReg = MI.getOperand(0).getReg(); + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + bool LP64 = Subtarget.isPPC64(); + unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : + (LP64 ? PPC::X0 : PPC::R0); + unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. - BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg) .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. if (SrcReg != PPC::CR0) // rlwinm rA, rA, ShiftBits, 0, 31. - BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) .addReg(Reg, RegState::Kill) .addImm(getPPCRegisterNumbering(SrcReg) * 4) .addImm(0) @@ -511,6 +498,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, MBB.erase(II); } +void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + bool LP64 = Subtarget.isPPC64(); + unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : + (LP64 ? PPC::X0 : PPC::R0); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_CR does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ), + Reg), FrameIndex); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + .addReg(Reg).addImm(32-ShiftBits).addImm(0) + .addImm(31); + } + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg) + .addReg(Reg); + + // Discard the pseudo instruction. + MBB.erase(II); +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { @@ -556,16 +585,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - // Special case for pseudo-op SPILL_CR. - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default. + // Special case for pseudo-ops SPILL_CR and RESTORE_CR. + if (requiresRegisterScavenging(MF)) { if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex, SPAdj, RS); + return; } + } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). + + bool is64Bit = Subtarget.isPPC64(); MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? - PPC::R31 : PPC::R1, + (is64Bit ? PPC::X31 : PPC::R31) : + (is64Bit ? PPC::X1 : PPC::R1), false); // Figure out if the offset in the instruction is shifted right two bits. This @@ -611,19 +647,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - // FIXME: figure out what SPAdj is doing here. - // FIXME (64-bit): Use "findScratchRegister". unsigned SReg; - if (requiresRegisterScavenging(MF)) - SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj); - else - SReg = PPC::R0; + if (requiresRegisterScavenging(MF)) { + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); + } else + SReg = is64Bit ? PPC::X0 : PPC::R0; // Insert a set of rA with the full offset value before the ld, st, or add - BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) .addImm(Offset >> 16); - BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) .addReg(SReg, RegState::Kill) .addImm(Offset); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index f70a594..faf690f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -57,6 +57,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 8acf75c..baa0eb5 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" +#include "PPCRegisterInfo.h" #include "PPC.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" @@ -140,3 +141,22 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage() || isDecl; } + +bool PPCSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + if (DarwinDirective == PPC::DIR_440) + return false; + + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + + if (isPPC64()) + CriticalPathRCs.push_back(&PPC::G8RCRegClass); + else + CriticalPathRCs.push_back(&PPC::GPRCRegClass); + + return OptLevel >= CodeGenOpt::Default; +} + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index d2b853d..62b2424 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -148,6 +148,10 @@ public: bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } + /// enablePostRAScheduler - True at 'More' optimization. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index de8fca0..8e71c46 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -28,10 +28,11 @@ extern "C" void LLVMInitializePowerPCTarget() { PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), @@ -45,17 +46,19 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } @@ -81,7 +84,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, if (Subtarget.isPPC64()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. - DisableJumpTables = true; + Options.DisableJumpTables = true; // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 03b27c6..0427876 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -41,7 +41,7 @@ class PPCTargetMachine : public LLVMTargetMachine { public: PPCTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); @@ -79,7 +79,7 @@ public: class PPC32TargetMachine : public PPCTargetMachine { public: PPC32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -89,7 +89,7 @@ public: class PPC64TargetMachine : public PPCTargetMachine { public: PPC64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt index f63111f..fdb8a62 100644 --- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt +++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMPowerPCInfo PowerPCTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMPowerPCInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen) diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt index f51b417..f77d85b 100644 --- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = PowerPCInfo parent = PowerPC required_libraries = MC Support Target add_to_library_groups = PowerPC - diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 9687951..56ee7c2 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -22,17 +22,5 @@ add_llvm_target(SparcCodeGen SparcSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMSparcCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSparcDesc - LLVMSparcInfo - LLVMSupport - LLVMTarget - ) - add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index dab35e5..9295408 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -100,7 +100,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) - if (I->getDesc().hasDelaySlot()) { + if (I->hasDelaySlot()) { MachineBasicBlock::iterator D = MBB.end(); MachineBasicBlock::iterator J = I; @@ -149,7 +149,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, } //Call's delay filler can def some of call's uses. - if (slot->getDesc().isCall()) + if (slot->isCall()) insertCallUses(slot, RegUses); else insertDefsUses(slot, RegDefs, RegUses); @@ -170,7 +170,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB, if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isLabel() - || I->getDesc().hasDelaySlot() + || I->hasDelaySlot() || isDelayFiller(MBB, I)) break; @@ -194,13 +194,13 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate, if (candidate->isImplicitDef() || candidate->isKill()) return true; - if (candidate->getDesc().mayLoad()) { + if (candidate->mayLoad()) { sawLoad = true; if (sawStore) return true; } - if (candidate->getDesc().mayStore()) { + if (candidate->mayStore()) { if (sawStore) return true; sawStore = true; @@ -298,13 +298,13 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, return false; if (candidate->getOpcode() == SP::UNIMP) return true; - const MCInstrDesc &prevdesc = (--candidate)->getDesc(); - return prevdesc.hasDelaySlot(); + --candidate; + return candidate->hasDelaySlot(); } bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) { - if (!I->getDesc().isCall()) + if (!I->isCall()) return false; unsigned structSizeOpNum = 0; diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt index 38c797f..fe20d2f 100644 --- a/lib/Target/Sparc/LLVMBuild.txt +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = Sparc @@ -27,4 +30,3 @@ name = SparcCodeGen parent = Sparc required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target add_to_library_groups = Sparc - diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt index d3bdf0b..9d4db4d 100644 --- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt @@ -3,10 +3,4 @@ add_llvm_library(LLVMSparcDesc SparcMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMSparcDesc - LLVMMC - LLVMSparcInfo - LLVMSupport - ) - add_dependencies(LLVMSparcDesc SparcCommonTableGen) diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt index a339cec..97f8f16 100644 --- a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = SparcDesc parent = Sparc required_libraries = MC SparcInfo Support add_to_library_groups = Sparc - diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index deb39d9..7548bbf 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -236,9 +236,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + while (I != Pred->begin() && !(--I)->isTerminator()) ; // Noop - return I == Pred->end() || !I->getDesc().isBarrier(); + return I == Pred->end() || !I->isBarrier(); } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 25104d1..3608d3b 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -763,7 +763,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::FMA , MVT::f32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ , MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i32, Expand); diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 7a6bf50..5290d42 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -133,7 +133,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, break; //Terminator is not a branch - if (!I->getDesc().isBranch()) + if (!I->isBranch()) return true; //Handle Unconditional branches @@ -195,7 +195,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode); BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA)) .addMBB(TargetBB); - MBB.addSuccessor(TargetBB); + OldInst->eraseFromParent(); UnCondBrIter->eraseFromParent(); diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 7dff799..8e16fd7 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -26,10 +26,11 @@ extern "C" void LLVMInitializeSparcTarget() { /// SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), @@ -52,16 +53,20 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){ SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, Reloc::Model RM, + StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { + : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, Reloc::Model RM, + StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { + : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 63bfa5d..cedc1e3 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -34,9 +34,9 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcFrameLowering FrameLowering; public: SparcTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool is64bit); + CodeGenOpt::Level OL, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -65,6 +65,7 @@ class SparcV8TargetMachine : public SparcTargetMachine { public: SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -75,6 +76,7 @@ class SparcV9TargetMachine : public SparcTargetMachine { public: SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt index a076023..b0d031e 100644 --- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt +++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMSparcInfo SparcTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMSparcInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMSparcInfo SparcCommonTableGen) diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt index 81c9032..b5c320f 100644 --- a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = SparcInfo parent = Sparc required_libraries = MC Support Target add_to_library_groups = Sparc - diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index d52ecb3..440f9ad 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -13,7 +13,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/ErrorHandling.h" @@ -73,23 +72,6 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); } -int -TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const { - if (!ItinData || ItinData->isEmpty()) - return -1; - - if (!DefNode->isMachineOpcode()) - return -1; - - unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); - if (!UseNode->isMachineOpcode()) - return ItinData->getOperandCycle(DefClass, DefIdx); - unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); - return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); -} - int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -99,17 +81,6 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return ItinData->getStageLatency(MI->getDesc().getSchedClass()); } -int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, - SDNode *N) const { - if (!ItinData || ItinData->isEmpty()) - return 1; - - if (!N->isMachineOpcode()) - return 1; - - return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); -} - bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx) const { @@ -129,19 +100,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, } -bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isTerminator()) return false; - - // Conditional branch is a special case. - if (MCID.isBranch() && !MCID.isBarrier()) - return true; - if (!MCID.isPredicable()) - return true; - return !isPredicated(MI); -} - - /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index aa2e014..768facb 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -22,15 +22,96 @@ char TargetLibraryInfo::ID = 0; const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { - "memset", + "acos", + "acosl", + "acosf", + "asin", + "asinl", + "asinf", + "atan", + "atanl", + "atanf", + "atan2", + "atan2l", + "atan2f", + "ceil", + "ceill", + "ceilf", + "copysign", + "copysignf", + "copysignl", + "cos", + "cosl", + "cosf", + "cosh", + "coshl", + "coshf", + "exp", + "expl", + "expf", + "exp2", + "exp2l", + "exp2f", + "expm1", + "expm1l", + "expl1f", + "fabs", + "fabsl", + "fabsf", + "floor", + "floorl", + "floorf", + "fiprintf", + "fmod", + "fmodl", + "fmodf", + "fputs", + "fwrite", + "iprintf", + "log", + "logl", + "logf", + "log2", + "log2l", + "log2f", + "log10", + "log10l", + "log10f", + "log1p", + "log1pl", + "log1pf", "memcpy", "memmove", + "memset", "memset_pattern16", - "iprintf", + "nearbyint", + "nearbyintf", + "nearbyintl", + "pow", + "powf", + "powl", + "rint", + "rintf", + "rintl", + "sin", + "sinl", + "sinf", + "sinh", + "sinhl", + "sinhf", "siprintf", - "fiprintf", - "fwrite", - "fputs" + "sqrt", + "sqrtl", + "sqrtf", + "tan", + "tanl", + "tanf", + "tanh", + "tanhl", + "tanhf", + "trunc", + "truncf", + "truncl" }; /// initialize - Initialize the set of available library functions based on the diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 56b7b69..fc8b67b 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -48,7 +48,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx, TargetLoweringObjectFile::~TargetLoweringObjectFile() { } -static bool isSuitableForBSS(const GlobalVariable *GV) { +static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) { const Constant *C = GV->getInitializer(); // Must have zero initializer. @@ -133,7 +133,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // Handle thread-local data first. if (GVar->isThreadLocal()) { - if (isSuitableForBSS(GVar)) + if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) return SectionKind::getThreadBSS(); return SectionKind::getThreadData(); } @@ -143,7 +143,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, return SectionKind::getCommon(); // Variable can be easily put to BSS section. - if (isSuitableForBSS(GVar)) { + if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) { if (GVar->hasLocalLinkage()) return SectionKind::getBSSLocal(); else if (GVar->hasExternalLinkage()) diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 805e16e..fb7bbbb 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -24,153 +22,11 @@ using namespace llvm; // namespace llvm { - bool LessPreciseFPMADOption; - bool PrintMachineCode; - bool NoFramePointerElim; - bool NoFramePointerElimNonLeaf; - bool NoExcessFPPrecision; - bool UnsafeFPMath; - bool NoInfsFPMath; - bool NoNaNsFPMath; - bool HonorSignDependentRoundingFPMathOption; - bool UseSoftFloat; - FloatABI::ABIType FloatABIType; - bool NoImplicitFloat; - bool NoZerosInBSS; - bool JITExceptionHandling; - bool JITEmitDebugInfo; - bool JITEmitDebugInfoToDisk; - bool GuaranteedTailCallOpt; - unsigned StackAlignmentOverride; - bool RealignStack; - bool DisableJumpTables; bool StrongPHIElim; bool HasDivModLibcall; bool AsmVerbosityDefault(false); - bool EnableSegmentedStacks; } -static cl::opt<bool, true> -PrintCode("print-machineinstrs", - cl::desc("Print generated machine code"), - cl::location(PrintMachineCode), cl::init(false)); -static cl::opt<bool, true> -DisableFPElim("disable-fp-elim", - cl::desc("Disable frame pointer elimination optimization"), - cl::location(NoFramePointerElim), - cl::init(false)); -static cl::opt<bool, true> -DisableFPElimNonLeaf("disable-non-leaf-fp-elim", - cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"), - cl::location(NoFramePointerElimNonLeaf), - cl::init(false)); -static cl::opt<bool, true> -DisableExcessPrecision("disable-excess-fp-precision", - cl::desc("Disable optimizations that may increase FP precision"), - cl::location(NoExcessFPPrecision), - cl::init(false)); -static cl::opt<bool, true> -EnableFPMAD("enable-fp-mad", - cl::desc("Enable less precise MAD instructions to be generated"), - cl::location(LessPreciseFPMADOption), - cl::init(false)); -static cl::opt<bool, true> -EnableUnsafeFPMath("enable-unsafe-fp-math", - cl::desc("Enable optimizations that may decrease FP precision"), - cl::location(UnsafeFPMath), - cl::init(false)); -static cl::opt<bool, true> -EnableNoInfsFPMath("enable-no-infs-fp-math", - cl::desc("Enable FP math optimizations that assume no +-Infs"), - cl::location(NoInfsFPMath), - cl::init(false)); -static cl::opt<bool, true> -EnableNoNaNsFPMath("enable-no-nans-fp-math", - cl::desc("Enable FP math optimizations that assume no NaNs"), - cl::location(NoNaNsFPMath), - cl::init(false)); -static cl::opt<bool, true> -EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math", - cl::Hidden, - cl::desc("Force codegen to assume rounding mode can change dynamically"), - cl::location(HonorSignDependentRoundingFPMathOption), - cl::init(false)); -static cl::opt<bool, true> -GenerateSoftFloatCalls("soft-float", - cl::desc("Generate software floating point library calls"), - cl::location(UseSoftFloat), - cl::init(false)); -static cl::opt<llvm::FloatABI::ABIType, true> -FloatABIForCalls("float-abi", - cl::desc("Choose float ABI type"), - cl::location(FloatABIType), - cl::init(FloatABI::Default), - cl::values( - clEnumValN(FloatABI::Default, "default", - "Target default float ABI type"), - clEnumValN(FloatABI::Soft, "soft", - "Soft float ABI (implied by -soft-float)"), - clEnumValN(FloatABI::Hard, "hard", - "Hard float ABI (uses FP registers)"), - clEnumValEnd)); -static cl::opt<bool, true> -DontPlaceZerosInBSS("nozero-initialized-in-bss", - cl::desc("Don't place zero-initialized symbols into bss section"), - cl::location(NoZerosInBSS), - cl::init(false)); -static cl::opt<bool, true> -EnableJITExceptionHandling("jit-enable-eh", - cl::desc("Emit exception handling information"), - cl::location(JITExceptionHandling), - cl::init(false)); -// In debug builds, make this default to true. -#ifdef NDEBUG -#define EMIT_DEBUG false -#else -#define EMIT_DEBUG true -#endif -static cl::opt<bool, true> -EmitJitDebugInfo("jit-emit-debug", - cl::desc("Emit debug information to debugger"), - cl::location(JITEmitDebugInfo), - cl::init(EMIT_DEBUG)); -#undef EMIT_DEBUG -static cl::opt<bool, true> -EmitJitDebugInfoToDisk("jit-emit-debug-to-disk", - cl::Hidden, - cl::desc("Emit debug info objfiles to disk"), - cl::location(JITEmitDebugInfoToDisk), - cl::init(false)); - -static cl::opt<bool, true> -EnableGuaranteedTailCallOpt("tailcallopt", - cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."), - cl::location(GuaranteedTailCallOpt), - cl::init(false)); -static cl::opt<unsigned, true> -OverrideStackAlignment("stack-alignment", - cl::desc("Override default stack alignment"), - cl::location(StackAlignmentOverride), - cl::init(0)); -static cl::opt<bool, true> -EnableRealignStack("realign-stack", - cl::desc("Realign stack if needed"), - cl::location(RealignStack), - cl::init(true)); -static cl::opt<bool, true> -DisableSwitchTables(cl::Hidden, "disable-jump-tables", - cl::desc("Do not generate jump tables."), - cl::location(DisableJumpTables), - cl::init(false)); -static cl::opt<bool, true> -EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", - cl::desc("Use strong PHI elimination."), - cl::location(StrongPHIElim), - cl::init(false)); -static cl::opt<std::string> -TrapFuncName("trap-func", cl::Hidden, - cl::desc("Emit a call to trap function rather than a trap instruction"), - cl::init("")); static cl::opt<bool> DataSections("fdata-sections", cl::desc("Emit data into separate sections"), @@ -179,18 +35,14 @@ static cl::opt<bool> FunctionSections("ffunction-sections", cl::desc("Emit functions into separate sections"), cl::init(false)); -static cl::opt<bool, true> -SegmentedStacks("segmented-stacks", - cl::desc("Use segmented stacks if possible."), - cl::location(EnableSegmentedStacks), - cl::init(false)); //--------------------------------------------------------------------------- // TargetMachine Class // TargetMachine::TargetMachine(const Target &T, - StringRef TT, StringRef CPU, StringRef FS) + StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options) : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), CodeGenInfo(0), AsmInfo(0), MCRelaxAll(false), @@ -198,11 +50,8 @@ TargetMachine::TargetMachine(const Target &T, MCSaveTempLabels(false), MCUseLoc(true), MCUseCFI(true), - MCUseDwarfDirectory(false) { - // Typically it will be subtargets that will adjust FloatABIType from Default - // to Soft or Hard. - if (UseSoftFloat) - FloatABIType = FloatABI::Soft; + MCUseDwarfDirectory(false), + Options(Options) { } TargetMachine::~TargetMachine() { @@ -258,36 +107,3 @@ void TargetMachine::setDataSections(bool V) { DataSections = V; } -namespace llvm { - /// DisableFramePointerElim - This returns true if frame pointer elimination - /// optimization should be disabled for the given machine function. - bool DisableFramePointerElim(const MachineFunction &MF) { - // Check to see if we should eliminate non-leaf frame pointers and then - // check to see if we should eliminate all frame pointers. - if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->hasCalls(); - } - - return NoFramePointerElim; - } - - /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option - /// is specified on the command line. When this flag is off(default), the - /// code generator is not allowed to generate mad (multiply add) if the - /// result is "less precise" than doing those operations individually. - bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; } - - /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume - /// that the rounding mode of the FPU can change from its default. - bool HonorSignDependentRoundingFPMath() { - return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; - } - - /// getTrapFunctionName - If this returns a non-empty string, this means isel - /// should lower Intrinsic::trap to a call to the specified function name - /// instead of an ISD::TRAP node. - StringRef getTrapFunctionName() { - return TrapFuncName; - } -} diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 67239b8..2689837 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -13,8 +13,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt index 94aca7a..47489bb 100644 --- a/lib/Target/X86/AsmParser/CMakeLists.txt +++ b/lib/Target/X86/AsmParser/CMakeLists.txt @@ -5,12 +5,4 @@ add_llvm_library(LLVMX86AsmParser X86AsmParser.cpp ) -add_llvm_library_dependencies(LLVMX86AsmParser - LLVMMC - LLVMMCParser - LLVMSupport - LLVMX86Desc - LLVMX86Info - ) - add_dependencies(LLVMX86AsmParser X86CommonTableGen) diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt index 6c2405a..9f94d5d 100644 --- a/lib/Target/X86/AsmParser/LLVMBuild.txt +++ b/lib/Target/X86/AsmParser/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86AsmParser parent = X86 required_libraries = MC MCParser Support X86Desc X86Info add_to_library_groups = X86 - diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 4542d4b..be15899 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -51,21 +51,6 @@ endif() add_llvm_target(X86CodeGen ${sources}) -add_llvm_library_dependencies(LLVMX86CodeGen - LLVMAnalysis - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - LLVMX86AsmPrinter - LLVMX86Desc - LLVMX86Info - LLVMX86Utils - ) - add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 4f570d5..0cd6db9 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler X86DisassemblerDecoder.c ) -add_llvm_library_dependencies(LLVMX86Disassembler - LLVMMC - LLVMSupport - LLVMX86Info - ) - # workaround for hanging compilation on MSVC9 and 10 if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt index cd748cf..cac7adf 100644 --- a/lib/Target/X86/Disassembler/LLVMBuild.txt +++ b/lib/Target/X86/Disassembler/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Disassembler parent = X86 required_libraries = MC Support X86Info add_to_library_groups = X86 - diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt index 2a2b5db..28e2460 100644 --- a/lib/Target/X86/InstPrinter/CMakeLists.txt +++ b/lib/Target/X86/InstPrinter/CMakeLists.txt @@ -6,10 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter X86InstComments.cpp ) -add_llvm_library_dependencies(LLVMX86AsmPrinter - LLVMMC - LLVMSupport - LLVMX86Utils - ) - add_dependencies(LLVMX86AsmPrinter X86CommonTableGen) diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt index fb01323..6868dde 100644 --- a/lib/Target/X86/InstPrinter/LLVMBuild.txt +++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86AsmPrinter parent = X86 required_libraries = MC Support X86Utils add_to_library_groups = X86 - diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 6e87efa..6e4b1b9 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -106,28 +106,92 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::PUNPCKHBWrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(16, ShuffleMask); + DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKHBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHBWrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKHBWYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHBWYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKHWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKHWDrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(8, ShuffleMask); + DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKHWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHWDrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKHWDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHWDYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKHDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKHDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(4, ShuffleMask); + DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKHDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKHDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKHQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKHQDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKHMask(2, ShuffleMask); + DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKHQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHQDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKHQDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKHQDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKHMask(MVT::v4i64, ShuffleMask); break; case X86::PUNPCKLBWrr: @@ -135,42 +199,117 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::PUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLBWMask(16, ShuffleMask); + DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKLBWrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLBWrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v16i8, ShuffleMask); + break; + case X86::VPUNPCKLBWYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLBWYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v32i8, ShuffleMask); break; case X86::PUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLWDMask(8, ShuffleMask); + DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKLWDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLWDrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v8i16, ShuffleMask); + break; + case X86::VPUNPCKLWDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLWDYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v16i16, ShuffleMask); break; case X86::PUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLDQMask(4, ShuffleMask); + DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKLDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v4i32, ShuffleMask); + break; + case X86::VPUNPCKLDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v8i32, ShuffleMask); break; case X86::PUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLQDQMask(2, ShuffleMask); + DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKLQDQrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLQDQrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v2i64, ShuffleMask); + break; + case X86::VPUNPCKLQDQYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPUNPCKLQDQYrm: + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeUNPCKLMask(MVT::v4i64, ShuffleMask); break; case X86::SHUFPDrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::SHUFPDrmi: - DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VSHUFPDrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VSHUFPDrmi: - DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VSHUFPDYrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPDYrmi: + DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -179,14 +318,25 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::SHUFPSrmi: - DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VSHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VSHUFPSrmi: - DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VSHUFPSYrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPSYrmi: + DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -195,14 +345,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDrm: - DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKLMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -210,7 +360,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDYrm: - DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); + DecodeUNPCKLMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -218,14 +368,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSrm: - DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKLMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -233,7 +383,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSYrm: - DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask); + DecodeUNPCKLMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -241,14 +391,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPDrm: - DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPDrm: - DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); + DecodeUNPCKHMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -256,7 +406,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPDYrm: - DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); + DecodeUNPCKHMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -264,14 +414,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPSrm: - DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPSrm: - DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); + DecodeUNPCKHMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; @@ -279,34 +429,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKHPSYrm: - DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask); + DecodeUNPCKHMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPSri: - DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPSmi: + DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPSYri: - DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPSYmi: + DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDri: - DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPDmi: + DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERMILPDYri: - DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(), - ShuffleMask); - Src1Name = getRegName(MI->getOperand(0).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + // FALL THROUGH. + case X86::VPERMILPDYmi: + DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VPERM2F128rr: - DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask); - Src1Name = getRegName(MI->getOperand(1).getReg()); + case X86::VPERM2I128rr: Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VPERM2F128rm: + case X86::VPERM2I128rm: + DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(), + ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; } diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt index 514566c..87305e0 100644 --- a/lib/Target/X86/LLVMBuild.txt +++ b/lib/Target/X86/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils + [component_0] type = TargetGroup name = X86 @@ -30,4 +33,3 @@ name = X86CodeGen parent = X86 required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils add_to_library_groups = X86 - diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 264e791..ab2ebb4 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -6,13 +6,6 @@ add_llvm_library(LLVMX86Desc X86MachObjectWriter.cpp ) -add_llvm_library_dependencies(LLVMX86Desc - LLVMMC - LLVMSupport - LLVMX86AsmPrinter - LLVMX86Info - ) - add_dependencies(LLVMX86Desc X86CommonTableGen) # Hack: we need to include 'main' target directory to grab private headers diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt index 3d09301..9e1d29c 100644 --- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Desc parent = X86 required_libraries = MC Support X86AsmPrinter X86Info add_to_library_groups = X86 - diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 69ad7d7..87b2b05 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -107,6 +107,11 @@ public: bool MayNeedRelaxation(const MCInst &Inst) const; + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; @@ -244,6 +249,14 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { return hasExp && !hasRIP; } +bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + // Relax if the value is too big for a (signed) i8. + return int64_t(Value) != int64_t(int8_t(Value)); +} + // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index c50f785..662ac1d 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -304,6 +304,12 @@ namespace X86II { // TAXD - Prefix before and after 0x0F. Combination of TA and XD. TAXD = 19 << Op0Shift, + // XOP8 - Prefix to include use of imm byte. + XOP8 = 20 << Op0Shift, + + // XOP9 - Prefix to exclude use of imm byte. + XOP9 = 21 << Op0Shift, + //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. // They are used to specify GPRs and SSE registers, 64-bit operand size, @@ -418,7 +424,16 @@ namespace X86II { /// storing a classifier in the imm8 field. To simplify our implementation, /// we handle this by storeing the classifier in the opcode field and using /// this flag to indicate that the encoder should do the wacky 3DNow! thing. - Has3DNow0F0FOpcode = 1U << 7 + Has3DNow0F0FOpcode = 1U << 7, + + /// XOP_W - Same bit as VEX_W. Used to indicate swapping of + /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used + /// for FMA4 and XOP instructions. + XOP_W = 1U << 8, + + /// XOP - Opcode prefix used by XOP instructions. + XOP = 1U << 9 + }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the @@ -488,9 +503,12 @@ namespace X86II { return 0; case X86II::MRMSrcMem: { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W; unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). + if (HasXOP_W) + ++FirstMemOp;// Skip the register source (which is encoded in I8IMM). // FIXME: Maybe lea should have its own form? This is a horrible hack. //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 2703100..eb64ad1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -125,7 +125,19 @@ getNonexecutableStackSection(MCContext &Ctx) const { 0, SectionKind::getMetadata()); } -X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { +X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { + if (Triple.getArch() == Triple::x86_64) { + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + } + + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + TextAlignFillValue = 0x90; +} + +X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { GlobalPrefix = ""; PrivateGlobalPrefix = ".L"; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h index 2cd4c8e..5d619e8 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h @@ -38,8 +38,12 @@ namespace llvm { virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; - struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { - explicit X86MCAsmInfoCOFF(const Triple &Triple); + struct X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { + explicit X86MCAsmInfoMicrosoft(const Triple &Triple); + }; + + struct X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF { + explicit X86MCAsmInfoGNUCOFF(const Triple &Triple); }; } // namespace llvm diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 1ab469c..8e14cb1 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -169,23 +169,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { return false; } -/// StartsWithGlobalOffsetTable - Return true for the simple cases where this -/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support -/// PIC on ELF i386 as that symbol is magic. We check only simple case that +/// StartsWithGlobalOffsetTable - Check if this expression starts with +/// _GLOBAL_OFFSET_TABLE_ and if it is of the form +/// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF +/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that /// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start /// of a binary expression. -static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) { +enum GlobalOffsetTableExprKind { + GOT_None, + GOT_Normal, + GOT_SymDiff +}; +static GlobalOffsetTableExprKind +StartsWithGlobalOffsetTable(const MCExpr *Expr) { + const MCExpr *RHS = 0; if (Expr->getKind() == MCExpr::Binary) { const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr); Expr = BE->getLHS(); + RHS = BE->getRHS(); } if (Expr->getKind() != MCExpr::SymbolRef) - return false; + return GOT_None; const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); const MCSymbol &S = Ref->getSymbol(); - return S.getName() == "_GLOBAL_OFFSET_TABLE_"; + if (S.getName() != "_GLOBAL_OFFSET_TABLE_") + return GOT_None; + if (RHS && RHS->getKind() == MCExpr::SymbolRef) + return GOT_SymDiff; + return GOT_Normal; } void X86MCCodeEmitter:: @@ -209,12 +222,15 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If we have an immoffset, add it to the expression. if ((FixupKind == FK_Data_4 || - FixupKind == MCFixupKind(X86::reloc_signed_4byte)) && - StartsWithGlobalOffsetTable(Expr)) { - assert(ImmOffset == 0); - - FixupKind = MCFixupKind(X86::reloc_global_offset_table); - ImmOffset = CurByte; + FixupKind == MCFixupKind(X86::reloc_signed_4byte))) { + GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr); + if (Kind != GOT_None) { + assert(ImmOffset == 0); + + FixupKind = MCFixupKind(X86::reloc_global_offset_table); + if (Kind == GOT_Normal) + ImmOffset = CurByte; + } } // If the fixup is pc-relative, we need to bias the value to be relative to @@ -415,6 +431,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // opcode extension, or ignored, depending on the opcode byte) unsigned char VEX_W = 0; + // XOP_W: opcode specific, same bit as VEX_W, but used to + // swap operand 3 and 4 for FMA4 and XOP instructions + unsigned char XOP_W = 0; + + // XOP: Use XOP prefix byte 0x8f instead of VEX. + unsigned char XOP = 0; + // VEX_5M (VEX m-mmmmm field): // // 0b00000: Reserved for future use @@ -422,7 +445,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // 0b00010: implied 0F 38 leading opcode bytes // 0b00011: implied 0F 3A leading opcode bytes // 0b00100-0b11111: Reserved for future use - // + // 0b01000: XOP map select - 08h instructions with imm byte + // 0b10001: XOP map select - 09h instructions with no imm byte unsigned char VEX_5M = 0x1; // VEX_4V (VEX vvvv field): a register specifier @@ -453,6 +477,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) VEX_W = 1; + if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W) + XOP_W = 1; + + if ((TSFlags >> X86II::VEXShift) & X86II::XOP) + XOP = 1; + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) VEX_L = 1; @@ -482,6 +512,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::XD: // F2 0F VEX_PP = 0x3; break; + case X86II::XOP8: + VEX_5M = 0x8; + break; + case X86II::XOP9: + VEX_5M = 0x9; + break; case X86II::A6: // Bypass: Not used by VEX case X86II::A7: // Bypass: Not used by VEX case X86II::TB: // Bypass: Not used by VEX @@ -489,6 +525,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, break; // No prefix! } + // Set the vector length to 256-bit if YMM0-YMM15 is used for (unsigned i = 0; i != MI.getNumOperands(); ++i) { if (!MI.getOperand(i).isReg()) @@ -529,6 +566,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // src1(ModR/M), MemAddr, imm8 // src1(ModR/M), MemAddr, src2(VEX_I8IMM) // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) VEX_R = 0x0; @@ -620,16 +660,16 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); - if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix + if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix EmitByte(0xC5, CurByte, OS); EmitByte(LastByte | (VEX_R << 7), CurByte, OS); return; } // 3 byte VEX prefix - EmitByte(0xC4, CurByte, OS); + EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS); EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); - EmitByte(LastByte | (VEX_W << 7), CurByte, OS); + EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS); } /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 @@ -889,6 +929,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the VEX.VVVV field? bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W; + unsigned XOP_W_I8IMMOperand = 2; // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); @@ -961,9 +1003,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) SrcRegNum++; + if(HasXOP_W) // Skip 2nd src (which is encoded in I8IMM) + SrcRegNum++; + EmitRegModRMByte(MI.getOperand(SrcRegNum), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); - CurOp = SrcRegNum + 1; + + // 2 operands skipped with HasXOP_W, comensate accordingly + CurOp = HasXOP_W ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) ++CurOp; break; @@ -975,6 +1022,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, ++AddrOperands; ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). } + if(HasXOP_W) // Skip second register source (encoded in I8IMM) + ++FirstMemOp; EmitByte(BaseOpcode, CurByte, OS); @@ -1062,12 +1111,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // according to the right size for the instruction. if (CurOp != NumOps) { // The last source register of a 4 operand instruction in AVX is encoded - // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. + // in bits[7:4] of a immediate byte. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { - const MCOperand &MO = MI.getOperand(CurOp++); + const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand + : CurOp); + CurOp++; bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg()); unsigned RegNum = (IsExtReg ? (1 << 7) : 0); RegNum |= GetX86RegNum(MO) << 4; + // If there is an additional 5th operand it must be an immediate, which + // is encoded in bits[3:0] + if(CurOp != NumOps) { + const MCOperand &MIMM = MI.getOperand(CurOp++); + if(MIMM.isImm()) { + unsigned Val = MIMM.getImm(); + assert(Val < 16 && "Immediate operand value out of range"); + RegNum |= Val; + } + } EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, Fixups); } else { diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index a843515..f2a34ed 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -361,8 +361,10 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { MAI = new X86_64MCAsmInfoDarwin(TheTriple); else MAI = new X86MCAsmInfoDarwin(TheTriple); - } else if (TheTriple.isOSWindows()) { - MAI = new X86MCAsmInfoCOFF(TheTriple); + } else if (TheTriple.getOS() == Triple::Win32) { + MAI = new X86MCAsmInfoMicrosoft(TheTriple); + } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) { + MAI = new X86MCAsmInfoGNUCOFF(TheTriple); } else { MAI = new X86ELFMCAsmInfo(TheTriple); } diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 7d901af..a581993 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -922,16 +922,3 @@ _test2: ## @test2 The insertps's of $0 are pointless complex copies. //===---------------------------------------------------------------------===// - -If SSE4.1 is available we should inline rounding functions instead of emitting -a libcall. - -floor: roundsd $0x01, %xmm, %xmm -ceil: roundsd $0x02, %xmm, %xmm - -and likewise for the single precision versions. - -Currently, SelectionDAGBuilder doesn't turn calls to these functions into the -corresponding nodes and some targets (including X86) aren't ready for them. - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt index 4da00fa..b1d0b9f 100644 --- a/lib/Target/X86/TargetInfo/CMakeLists.txt +++ b/lib/Target/X86/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMX86Info X86TargetInfo.cpp ) -add_llvm_library_dependencies(LLVMX86Info - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMX86Info X86CommonTableGen) diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt index ee015bd..3c64a22 100644 --- a/lib/Target/X86/TargetInfo/LLVMBuild.txt +++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Info parent = X86 required_libraries = MC Support Target add_to_library_groups = X86 - diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt index caffd8b..2e72c34 100644 --- a/lib/Target/X86/Utils/CMakeLists.txt +++ b/lib/Target/X86/Utils/CMakeLists.txt @@ -4,9 +4,4 @@ add_llvm_library(LLVMX86Utils X86ShuffleDecode.cpp ) -add_llvm_library_dependencies(LLVMX86Utils - LLVMCore - LLVMSupport - ) - add_dependencies(LLVMX86Utils X86CommonTableGen) diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt index 3ee441e..de0a30f 100644 --- a/lib/Target/X86/Utils/LLVMBuild.txt +++ b/lib/Target/X86/Utils/LLVMBuild.txt @@ -21,4 +21,3 @@ name = X86Utils parent = X86 required_libraries = Core Support add_to_library_groups = X86 - diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index f6c9d7b..e7631b6 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -95,54 +95,31 @@ void DecodePSHUFLWMask(unsigned Imm, ShuffleMask.push_back(7); } -void DecodePUNPCKLBWMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask); -} - -void DecodePUNPCKLWDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask); -} - -void DecodePUNPCKLDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); -} - -void DecodePUNPCKLQDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); -} - -void DecodePUNPCKLMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(VT, ShuffleMask); -} +void DecodeSHUFPMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); -void DecodePUNPCKHMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); - ShuffleMask.push_back(i+NElts+NElts/2); - } -} + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; -void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - // Part that reads from dest. - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(Imm % NElts); - Imm /= NElts; - } - // Part that reads from src. - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(Imm % NElts + NElts); - Imm /= NElts; + int NewImm = Imm; + for (unsigned l = 0; l < NumLanes; ++l) { + unsigned LaneStart = l * NumLaneElts; + // Part that reads from dest. + for (unsigned i = 0; i != NumLaneElts/2; ++i) { + ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart); + NewImm /= NumLaneElts; + } + // Part that reads from src. + for (unsigned i = 0; i != NumLaneElts/2; ++i) { + ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart); + NewImm /= NumLaneElts; + } + if (NumLaneElts == 4) NewImm = Imm; // reload imm } } -void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -161,10 +138,10 @@ void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } } -/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -183,36 +160,23 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } } -// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes and the mask of the first lane must -// be the same of the second. -void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - unsigned NumLanes = (NumElts*32)/128; - unsigned LaneSize = NumElts/NumLanes; - - for (unsigned l = 0; l != NumLanes; ++l) { - for (unsigned i = 0; i != LaneSize; ++i) { - unsigned Idx = (Imm >> (i*2)) & 0x3 ; - ShuffleMask.push_back(Idx+(l*LaneSize)); - } - } -} +// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit +// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 +// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of +// the first lane must be the same of the second. +void DecodeVPERMILPMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); -// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes but the mask of the first lane can -// be the different of the second (not like VPERMILPS). -void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { - unsigned NumLanes = (NumElts*64)/128; - unsigned LaneSize = NumElts/NumLanes; + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; - for (unsigned l = 0; l < NumLanes; ++l) { - for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) { - unsigned Idx = (Imm >> i) & 0x1; - ShuffleMask.push_back(Idx+(l*LaneSize)); + for (unsigned l = 0; l != NumLanes; ++l) { + unsigned LaneStart = l*NumLaneElts; + for (unsigned i = 0; i != NumLaneElts; ++i) { + unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3 + : (Imm >> (i+LaneStart)) & 0x1; + ShuffleMask.push_back(Idx+LaneStart); } } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 35f6530..243728f 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -46,50 +46,25 @@ void DecodePSHUFHWMask(unsigned Imm, void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); -void DecodePUNPCKLBWMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLWDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLQDQMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKLMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodePUNPCKHMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeSHUFPMask(EVT VT, unsigned Imm, + SmallVectorImpl<unsigned> &ShuffleMask); -/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd +/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); -/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd +/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); - +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); -// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes and the mask of the first lane must -// be the same of the second. -void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); -// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit -// elements. For 256-bit vectors, it's considered as two 128 lanes, the -// referenced elements can't cross lanes but the mask of the first lane can -// be the different of the second (not like VPERMILPS). -void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm, +// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit +// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128 +// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of +// the first lane must be the same of the second. +void DecodeVPERMILPMask(EVT VT, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); void DecodeVPERM2F128Mask(unsigned Imm, diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 62a7016..8229ca5 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -91,6 +91,8 @@ def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", "Enable three-operand fused multiple-add">; def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", "Enable four-operand fused multiple-add">; +def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", + "Enable XOP instructions">; def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; @@ -194,14 +196,16 @@ def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, + Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT, FeatureSlowBTMem]>; -def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, - Feature3DNowA, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B, - FeatureSSE4A]>; -def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A]>; +// FIXME: Disabling AVX for now since it's not ready. +def : Proc<"bdver1", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, + FeatureAES, FeatureCLMUL, FeatureFMA4, + FeatureXOP, FeatureLZCNT]>; +def : Proc<"bdver2", [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B, + FeatureAES, FeatureCLMUL, FeatureFMA4, + FeatureXOP, FeatureF16C, FeatureLZCNT, + FeatureBMI]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 77b9905..aab2a05 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -158,10 +158,15 @@ def CC_X86_64_C : CallingConv<[ CCIfSubtarget<"hasXMM()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - // The first 8 256-bit vector arguments are passed in YMM registers. - CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], - CCIfSubtarget<"hasAVX()", - CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>, + // The first 8 256-bit vector arguments are passed in YMM registers, unless + // this is a vararg function. + // FIXME: This isn't precisely correct; the x86-64 ABI document says that + // fixed arguments to vararg functions are supposed to be passed in + // registers. Actually modeling that would be a lot of work, though. + CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, + YMM4, YMM5, YMM6, YMM7]>>>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ba615a8..ed16e88 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -1004,7 +1004,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, break; } - if (!Desc->isVariadic() && CurOp != NumOps) { + if (!MI.isVariadic() && CurOp != NumOps) { #ifndef NDEBUG dbgs() << "Cannot encode all operands of: " << MI << "\n"; #endif diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 32f1770..1589439 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -728,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; // Let SDISel handle vararg functions. @@ -1529,7 +1529,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) return false; PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); @@ -1543,7 +1543,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Fast-isel doesn't know about callee-pop yet. if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg, - GuaranteedTailCallOpt)) + TM.Options.GuaranteedTailCallOpt)) return false; // Check whether the function can return without sret-demotion. @@ -2121,7 +2121,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { default: return false; case MVT::f32: if (X86ScalarSSEf32) { - Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS; + Opc = X86::FsFLD0SS; RC = X86::FR32RegisterClass; } else { Opc = X86::LD_Fp032; @@ -2130,7 +2130,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { break; case MVT::f64: if (X86ScalarSSEf64) { - Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD; + Opc = X86::FsFLD0SD; RC = X86::FR64RegisterClass; } else { Opc = X86::LD_Fp064; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 819d242..6a40cc1 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -47,7 +47,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const { const MachineModuleInfo &MMI = MF.getMMI(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); - return (DisableFramePointerElim(MF) || + return (MF.getTarget().Options.DisableFramePointerElim(MF) || RI->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || @@ -210,7 +210,7 @@ static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, uint64_t *NumBytes = NULL) { - // FIXME: THIS ISN'T RUN!!! + // FIXME: THIS ISN'T RUN!!! return; if (MBBI == MBB.end()) return; @@ -351,20 +351,22 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, /// register. The number corresponds to the enum lists in /// compact_unwind_encoding.h. static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) { - int Idx = 1; - for (; *CURegs; ++CURegs, ++Idx) + for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; return -1; } +// Number of registers that can be saved in a compact unwind encoding. +#define CU_NUM_SAVED_REGS 6 + /// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding /// used with frameless stacks. It is passed the number of registers to be saved /// and an array of the registers saved. -static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], - unsigned RegCount, - bool Is64Bit) { +static uint32_t +encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], + unsigned RegCount, bool Is64Bit) { // The saved registers are numbered from 1 to 6. In order to encode the order // in which they were saved, we re-number them according to their place in the // register order. The re-numbering is relative to the last re-numbered @@ -385,14 +387,21 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], }; const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); - uint32_t RenumRegs[6]; - for (unsigned i = 6 - RegCount; i < 6; ++i) { + for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) { int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); if (CUReg == -1) return ~0U; SavedRegs[i] = CUReg; + } + + // Reverse the list. + std::swap(SavedRegs[0], SavedRegs[5]); + std::swap(SavedRegs[1], SavedRegs[4]); + std::swap(SavedRegs[2], SavedRegs[3]); + uint32_t RenumRegs[CU_NUM_SAVED_REGS]; + for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) { unsigned Countless = 0; - for (unsigned j = 6 - RegCount; j < i; ++j) + for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) if (SavedRegs[j] < SavedRegs[i]) ++Countless; @@ -435,8 +444,9 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], /// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a /// compact encoding with a frame pointer. -static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], - bool Is64Bit) { +static uint32_t +encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS], + bool Is64Bit) { static const unsigned CU32BitRegs[] = { X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 }; @@ -448,13 +458,16 @@ static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], // Encode the registers in the order they were saved, 3-bits per register. The // registers are numbered from 1 to 6. uint32_t RegEnc = 0; - for (int I = 5; I >= 0; --I) { + for (int I = 0; I != 6; --I) { unsigned Reg = SavedRegs[I]; if (Reg == 0) break; int CURegNum = getCompactUnwindRegNum(CURegs, Reg); if (CURegNum == -1) return ~0U; - RegEnc |= (CURegNum & 0x7) << (5 - I); + + // Encode the 3-bit register number in order, skipping over 3-bits for each + // register. + RegEnc |= (CURegNum & 0x7) << ((5 - I) * 3); } assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!"); @@ -466,14 +479,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - bool Is64Bit = STI.is64Bit(); bool HasFP = hasFP(MF); - unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; - int SavedRegIdx = 6; + unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 }; + unsigned SavedRegIdx = 0; unsigned OffsetSize = (Is64Bit ? 8 : 4); @@ -481,14 +491,13 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { unsigned PushInstrSize = 1; unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); unsigned MoveInstrSize = (Is64Bit ? 3 : 2); - unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta); unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); unsigned StackDivide = (Is64Bit ? 8 : 4); unsigned InstrOffset = 0; - unsigned CFAOffset = 0; unsigned StackAdjust = 0; + unsigned StackSize = 0; MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. bool ExpectEnd = false; @@ -504,10 +513,10 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (Opc == PushInstr) { // If there are too many saved registers, we cannot use compact encoding. - if (--SavedRegIdx < 0) return 0; + if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0; - SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg(); - CFAOffset += OffsetSize; + SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg(); + StackAdjust += OffsetSize; InstrOffset += PushInstrSize; } else if (Opc == MoveInstr) { unsigned SrcReg = MI.getOperand(1).getReg(); @@ -516,13 +525,14 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { if (DstReg != FramePtr || SrcReg != StackPtr) return 0; - CFAOffset = 0; + StackAdjust = 0; memset(SavedRegs, 0, sizeof(SavedRegs)); - SavedRegIdx = 6; + SavedRegIdx = 0; InstrOffset += MoveInstrSize; - } else if (Opc == SubtractInstr) { - if (StackAdjust) - // We all ready have a stack pointer adjustment. + } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || + Opc == X86::SUB32ri || Opc == X86::SUB32ri8) { + if (StackSize) + // We already have a stack size. return 0; if (!MI.getOperand(0).isReg() || @@ -533,7 +543,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // %RSP<def> = SUB64ri8 %RSP, 48 return 0; - StackAdjust = MI.getOperand(2).getImm() / StackDivide; + StackSize = MI.getOperand(2).getImm() / StackDivide; SubtractInstrIdx += InstrOffset; ExpectEnd = true; } @@ -541,28 +551,30 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // Encode that we are using EBP/RBP as the frame pointer. uint32_t CompactUnwindEncoding = 0; - CFAOffset /= StackDivide; + StackAdjust /= StackDivide; if (HasFP) { - if ((CFAOffset & 0xFF) != CFAOffset) + if ((StackAdjust & 0xFF) != StackAdjust) // Offset was too big for compact encoding. return 0; // Get the encoding of the saved registers when we have a frame pointer. uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); - if (RegEnc == ~0U) - return 0; + if (RegEnc == ~0U) return 0; CompactUnwindEncoding |= 0x01000000; - CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16; + CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; CompactUnwindEncoding |= RegEnc & 0x7FFF; } else { - unsigned FullOffset = CFAOffset + StackAdjust; - if ((FullOffset & 0xFF) == FullOffset) { - // Frameless stack. + ++StackAdjust; + uint32_t TotalStackSize = StackAdjust + StackSize; + if ((TotalStackSize & 0xFF) == TotalStackSize) { + // Frameless stack with a small stack size. CompactUnwindEncoding |= 0x02000000; - CompactUnwindEncoding |= (FullOffset & 0xFF) << 16; + + // Encode the stack size. + CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16; } else { - if ((CFAOffset & 0x7) != CFAOffset) + if ((StackAdjust & 0x7) != StackAdjust) // The extra stack adjustments are too big for us to handle. return 0; @@ -573,16 +585,21 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { // instruction. CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; - // Encode any extra stack stack changes (done via push instructions). - CompactUnwindEncoding |= (CFAOffset & 0x7) << 13; + // Encode any extra stack stack adjustments (done via push instructions). + CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; } + // Encode the number of registers saved. + CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; + // Get the encoding of the saved registers when we don't have a frame // pointer. - uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, - 6 - SavedRegIdx, - Is64Bit); + uint32_t RegEnc = + encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx, + Is64Bit); if (RegEnc == ~0U) return 0; + + // Encode the register encoding. CompactUnwindEncoding |= RegEnc & 0x3FF; } @@ -638,10 +655,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // stack pointer (we fit in the Red Zone). if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && - !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. - !IsWin64 && // Win64 has no Red Zone - !EnableSegmentedStacks) { // Regular stack + !MFI->hasVarSizedObjects() && // No dynamic alloca. + !MFI->adjustsStack() && // No calls. + !IsWin64 && // Win64 has no Red Zone + !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -978,7 +995,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, unsigned Opc = PI->getOpcode(); if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && - !PI->getDesc().isTerminator()) + !PI->isTerminator()) break; --MBBI; @@ -1306,6 +1323,10 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { } } +// The stack limit in the TCB is set to this many bytes above the actual stack +// limit. +static const uint64_t kSplitStackAvailable = 256; + void X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MachineBasicBlock &prologueMBB = MF.front(); @@ -1360,16 +1381,24 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { TlsReg = X86::FS; TlsOffset = 0x70; - BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) - .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + if (StackSize < kSplitStackAvailable) + ScratchReg = X86::RSP; + else + BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { TlsReg = X86::GS; TlsOffset = 0x30; - BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) - .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + if (StackSize < kSplitStackAvailable) + ScratchReg = X86::ESP; + else + BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } @@ -1394,9 +1423,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.getRegInfo().setPhysRegUsed(X86::R10); MF.getRegInfo().setPhysRegUsed(X86::R11); } else { - // Since we'll call __morestack, stack alignment needs to be preserved. - BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP) - .addImm(8); BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) .addImm(X86FI->getArgumentStackSize()); BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) @@ -1411,11 +1437,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) .addExternalSymbol("__morestack"); - // __morestack only seems to remove 8 bytes off the stack. Add back the - // additional 8 bytes we added before pushing the arguments. - if (!Is64Bit) - BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP) - .addImm(8); if (IsNested) BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); else diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 96c6f41..03727a2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -256,7 +256,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); - } else if (!UseSoftFloat) { + } else if (!TM.Options.UseSoftFloat) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); @@ -270,7 +270,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!UseSoftFloat) { + if (!TM.Options.UseSoftFloat) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -313,7 +313,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); - } else if (!UseSoftFloat) { + } else if (!TM.Options.UseSoftFloat) { // Since AVX is a superset of SSE3, only check for SSE here. if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. @@ -378,6 +378,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i16 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i64 , Expand); if (Subtarget->hasBMI()) { setOperationAction(ISD::CTTZ , MVT::i8 , Promote); } else { @@ -388,6 +392,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i64 , Custom); } + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i16 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i64 , Expand); if (Subtarget->hasLZCNT()) { setOperationAction(ISD::CTLZ , MVT::i8 , Promote); } else { @@ -537,14 +545,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? MVT::i64 : MVT::i32, Custom); - else if (EnableSegmentedStacks) + else if (TM.Options.EnableSegmentedStacks) setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? MVT::i64 : MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? MVT::i64 : MVT::i32, Expand); - if (!UseSoftFloat && X86ScalarSSEf64) { + if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. addRegisterClass(MVT::f32, X86::FR32RegisterClass); @@ -576,7 +584,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // cases we handle. addLegalFPImmediate(APFloat(+0.0)); // xorpd addLegalFPImmediate(APFloat(+0.0f)); // xorps - } else if (!UseSoftFloat && X86ScalarSSEf32) { + } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. addRegisterClass(MVT::f32, X86::FR32RegisterClass); @@ -605,11 +613,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand); } - } else if (!UseSoftFloat) { + } else if (!TM.Options.UseSoftFloat) { // f32 and f64 in x87. // Set up the FP register classes. addRegisterClass(MVT::f64, X86::RFP64RegisterClass); @@ -620,7 +628,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f64 , Expand); setOperationAction(ISD::FCOS , MVT::f64 , Expand); } @@ -639,7 +647,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FMA, MVT::f32, Expand); // Long double always uses X87. - if (!UseSoftFloat) { + if (!TM.Options.UseSoftFloat) { addRegisterClass(MVT::f80, X86::RFP80RegisterClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -658,11 +666,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addLegalFPImmediate(TmpFlt2); // FLD1/FCHS } - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { setOperationAction(ISD::FSIN , MVT::f80 , Expand); setOperationAction(ISD::FCOS , MVT::f80 , Expand); } + setOperationAction(ISD::FFLOOR, MVT::f80, Expand); + setOperationAction(ISD::FCEIL, MVT::f80, Expand); + setOperationAction(ISD::FTRUNC, MVT::f80, Expand); + setOperationAction(ISD::FRINT, MVT::f80, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); setOperationAction(ISD::FMA, MVT::f80, Expand); } @@ -714,7 +727,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); @@ -748,7 +763,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. - if (!UseSoftFloat && Subtarget->hasMMX()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) { addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass); // No operations on x86mmx supported, everything uses intrinsics. } @@ -785,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); - if (!UseSoftFloat && Subtarget->hasXMM()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasXMM()) { addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -802,7 +817,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } - if (!UseSoftFloat && Subtarget->hasXMMInt()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasXMMInt()) { addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM @@ -983,7 +998,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->hasSSE42orAVX()) setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - if (!UseSoftFloat && Subtarget->hasAVX()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) { addRegisterClass(MVT::v32i8, X86::VR256RegisterClass); addRegisterClass(MVT::v16i16, X86::VR256RegisterClass); addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); @@ -1211,10 +1226,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - setPrefLoopAlignment(16); + setPrefLoopAlignment(4); // 2^4 bytes. benefitFromCodePlacementOpt = true; - setPrefFunctionAlignment(4); + setPrefFunctionAlignment(4); // 2^4 bytes. } @@ -1709,7 +1724,8 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { /// FuncIsMadeTailCallSafe - Return true if the function is being made into /// a tailcall target by changing its ABI. -static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { +static bool FuncIsMadeTailCallSafe(CallingConv::ID CC, + bool GuaranteedTailCallOpt) { return GuaranteedTailCallOpt && IsTailCallConvention(CC); } @@ -1723,7 +1739,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; - bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv); + bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv, + getTargetMachine().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; @@ -1873,7 +1890,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned StackSize = CCInfo.getNextStackOffset(); // Align stack specially for tail calls. - if (FuncIsMadeTailCallSafe(CallConv)) + if (FuncIsMadeTailCallSafe(CallConv, + MF.getTarget().Options.GuaranteedTailCallOpt)) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for @@ -1918,9 +1936,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasXMM()) && "SSE register cannot be used when SSE is disabled!"); - assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && + assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat && + NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM()) + if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps || + !Subtarget->hasXMM()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; @@ -1998,7 +2018,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) { + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, + MF.getTarget().Options.GuaranteedTailCallOpt)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -2098,7 +2119,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Sibcalls are automatically detected tailcalls which do not require // ABI changes. - if (!GuaranteedTailCallOpt && isTailCall) + if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) IsSibcall = true; if (isTailCall) @@ -2126,7 +2147,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv)) + else if (getTargetMachine().Options.GuaranteedTailCallOpt && + IsTailCallConvention(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; @@ -2305,7 +2327,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, int FI = 0; // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); - if (GuaranteedTailCallOpt) { + if (getTargetMachine().Options.GuaranteedTailCallOpt) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) @@ -2485,7 +2507,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) + if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, + getTargetMachine().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2643,7 +2666,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CallerCC = CallerF->getCallingConv(); bool CCMatch = CallerCC == CalleeCC; - if (GuaranteedTailCallOpt) { + if (getTargetMachine().Options.GuaranteedTailCallOpt) { if (IsTailCallConvention(CalleeCC) && CCMatch) return true; return false; @@ -2843,23 +2866,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPSY: - case X86ISD::VPERMILPD: - case X86ISD::VPERMILPDY: - case X86ISD::VPERM2F128: + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: + case X86ISD::VPERMILP: + case X86ISD::VPERM2X128: return true; } return false; @@ -2885,10 +2895,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFD: case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPSY: - case X86ISD::VPERMILPD: - case X86ISD::VPERMILPDY: + case X86ISD::VPERMILP: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2902,7 +2909,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: - case X86ISD::VPERM2F128: + case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } @@ -2920,18 +2927,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVLPD: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: return DAG.getNode(Opc, dl, VT, V1, V2); } return SDValue(); @@ -3231,7 +3228,7 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, bool hasSSSE3OrAVX) { int i, e = VT.getVectorNumElements(); - if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64) + if (VT.getSizeInBits() != 128) return false; // Do not handle v2i64 / v2f64 shuffles with palignr. @@ -3261,17 +3258,17 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand +/// isVSHUFPYMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 256-bit /// VSHUFPSY. -static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { +static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX, bool Commuted = false) { int NumElems = VT.getVectorNumElements(); - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) + if (!HasAVX || VT.getSizeInBits() != 256) return false; - if (NumElems != 8) + if (NumElems != 4 && NumElems != 8) return false; // VSHUFPSY divides the resulting vector into 4 chunks. @@ -3284,124 +3281,63 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT, // DST => Y7..Y4, Y7..Y4, X7..X4, X7..X4, // Y3..Y0, Y3..Y0, X3..X0, X3..X0 // - int QuarterSize = NumElems/4; - int HalfSize = QuarterSize*2; - for (int i = 0; i < QuarterSize; ++i) - if (!isUndefOrInRange(Mask[i], 0, HalfSize)) - return false; - for (int i = QuarterSize; i < QuarterSize*2; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) - return false; - - // The mask of the second half must be the same as the first but with - // the appropriate offsets. This works in the same way as VPERMILPS - // works with masks. - for (int i = QuarterSize*2; i < QuarterSize*3; ++i) { - if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) - return false; - int FstHalfIdx = i-HalfSize; - if (Mask[FstHalfIdx] < 0) - continue; - if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) - return false; - } - for (int i = QuarterSize*3; i < NumElems; ++i) { - if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) - return false; - int FstHalfIdx = i-HalfSize; - if (Mask[FstHalfIdx] < 0) - continue; - if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize)) - return false; - - } - - return true; -} - -/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VSHUFPSY instruction. -static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - EVT VT = SVOp->getValueType(0); - int NumElems = VT.getVectorNumElements(); - - assert(NumElems == 8 && VT.getSizeInBits() == 256 && - "Only supports v8i32 and v8f32 types"); - - int HalfSize = NumElems/2; - unsigned Mask = 0; - for (int i = 0; i != NumElems ; ++i) { - if (SVOp->getMaskElt(i) < 0) - continue; - // The mask of the first half must be equal to the second one. - unsigned Shamt = (i%HalfSize)*2; - unsigned Elt = SVOp->getMaskElt(i) % HalfSize; - Mask |= Elt << Shamt; - } - - return Mask; -} - -/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to 256-bit -/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS -/// version and the mask of the second half isn't binded with the first -/// one. -static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - int NumElems = VT.getVectorNumElements(); - - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) - return false; - - if (NumElems != 4) - return false; - - // VSHUFPSY divides the resulting vector into 4 chunks. + // VSHUFPDY divides the resulting vector into 4 chunks. // The sources are also splitted into 4 chunks, and each destination // chunk must come from a different source chunk. // // SRC1 => X3 X2 X1 X0 // SRC2 => Y3 Y2 Y1 Y0 // - // DST => Y2..Y3, X2..X3, Y1..Y0, X1..X0 + // DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0 // - int QuarterSize = NumElems/4; - int HalfSize = QuarterSize*2; - for (int i = 0; i < QuarterSize; ++i) - if (!isUndefOrInRange(Mask[i], 0, HalfSize)) - return false; - for (int i = QuarterSize; i < QuarterSize*2; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize)) - return false; - for (int i = QuarterSize*2; i < QuarterSize*3; ++i) - if (!isUndefOrInRange(Mask[i], HalfSize, NumElems)) - return false; - for (int i = QuarterSize*3; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2)) - return false; + unsigned QuarterSize = NumElems/4; + unsigned HalfSize = QuarterSize*2; + for (unsigned l = 0; l != 2; ++l) { + unsigned LaneStart = l*HalfSize; + for (unsigned s = 0; s != 2; ++s) { + unsigned QuarterStart = s*QuarterSize; + unsigned Src = (Commuted) ? (1-s) : s; + unsigned SrcStart = Src*NumElems + LaneStart; + for (unsigned i = 0; i != QuarterSize; ++i) { + int Idx = Mask[i+QuarterStart+LaneStart]; + if (!isUndefOrInRange(Idx, SrcStart, SrcStart+HalfSize)) + return false; + // For VSHUFPSY, the mask of the second half must be the same as the first + // but with the appropriate offsets. This works in the same way as + // VPERMILPS works with masks. + if (NumElems == 4 || l == 0 || Mask[i+QuarterStart] < 0) + continue; + if (!isUndefOrEqual(Idx, Mask[i+QuarterStart]+HalfSize)) + return false; + } + } + } return true; } -/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VSHUFPDY instruction. -static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { +/// getShuffleVSHUFPYImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VSHUFPSY/VSHUFPDY instructions. +static unsigned getShuffleVSHUFPYImmediate(SDNode *N) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); EVT VT = SVOp->getValueType(0); int NumElems = VT.getVectorNumElements(); - assert(NumElems == 4 && VT.getSizeInBits() == 256 && - "Only supports v4i64 and v4f64 types"); + assert(VT.getSizeInBits() == 256 && "Only supports 256-bit types"); + assert((NumElems == 4 || NumElems == 8) && "Only supports v4 and v8 types"); int HalfSize = NumElems/2; + unsigned Mul = (NumElems == 8) ? 2 : 1; unsigned Mask = 0; - for (int i = 0; i != NumElems ; ++i) { - if (SVOp->getMaskElt(i) < 0) + for (int i = 0; i != NumElems; ++i) { + int Elt = SVOp->getMaskElt(i); + if (Elt < 0) continue; - int Elt = SVOp->getMaskElt(i) % HalfSize; - Mask |= Elt << i; + Elt %= HalfSize; + unsigned Shamt = i; + // For VSHUFPSY, the mask of the first half must be equal to the second one. + if (NumElems == 8) Shamt %= HalfSize; + Mask |= Elt << (Shamt*Mul); } return Mask; @@ -3409,8 +3345,8 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, + unsigned NumElems) { for (unsigned i = 0; i != NumElems; ++i) { int idx = Mask[i]; if (idx < 0) @@ -3422,31 +3358,13 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { } } -/// isCommutedVSHUFP() - Return true if swapping operands will -/// allow to use the "vshufpd" or "vshufps" instruction -/// for 256-bit vectors -static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - - unsigned NumElems = VT.getVectorNumElements(); - if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8))) - return false; - - SmallVector<int, 8> CommutedMask; - for (unsigned i = 0; i < NumElems; ++i) - CommutedMask.push_back(Mask[i]); - - CommuteVectorShuffleMask(CommutedMask, VT); - return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget): - isVSHUFPSYMask(CommutedMask, VT, Subtarget); -} - - /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 128-bit -/// SHUFPS and SHUFPD. -static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { - int NumElems = VT.getVectorNumElements(); +/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be +/// reverse of what x86 shuffles want. +static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool Commuted = false) { + unsigned NumElems = VT.getVectorNumElements(); if (VT.getSizeInBits() != 128) return false; @@ -3454,12 +3372,14 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { if (NumElems != 2 && NumElems != 4) return false; - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) + unsigned Half = NumElems / 2; + unsigned SrcStart = Commuted ? NumElems : 0; + for (unsigned i = 0; i != Half; ++i) + if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems)) return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) + SrcStart = Commuted ? 0 : NumElems; + for (unsigned i = Half; i != NumElems; ++i) + if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems)) return false; return true; @@ -3471,32 +3391,6 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { return ::isSHUFPMask(M, N->getValueType(0)); } -/// isCommutedSHUFP - Returns true if the shuffle mask is exactly -/// the reverse of what x86 shuffles want. x86 shuffles requires the lower -/// half elements to come from vector 1 (which would equal the dest.) and -/// the upper half to come from vector 2. -static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { - int NumElems = VT.getVectorNumElements(); - - if (NumElems != 2 && NumElems != 4) - return false; - - int Half = NumElems / 2; - for (int i = 0; i < Half; ++i) - if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) - return false; - for (int i = Half; i < NumElems; ++i) - if (!isUndefOrInRange(Mask[i], 0, NumElems)) - return false; - return true; -} - -static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { - SmallVector<int, 8> M; - N->getMask(M); - return isCommutedSHUFPMask(M, N->getValueType(0)); -} - /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { @@ -3765,15 +3659,15 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { return ::isMOVLMask(M, N->getValueType(0)); } -/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered +/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered /// as permutations between 128-bit chunks or halves. As an example: this /// shuffle bellow: /// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15> /// The first half comes from the second half of V1 and the second half from the /// the second half of V2. -static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256) +static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX) { + if (!HasAVX || VT.getSizeInBits() != 256) return false; // The shuffle result is divided into half A and half B. In total the two @@ -3801,10 +3695,9 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT, return MatchA && MatchB; } -/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VPERM2F128 instructions. -static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); +/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions. +static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); int HalfSize = VT.getVectorNumElements()/2; @@ -3826,81 +3719,47 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) { return (FstHalf | (SndHalf << 4)); } -/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand +/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to VPERMILPD*. /// Note that VPERMIL mask matching is different depending whether theunderlying /// type is 32 or 64. In the VPERMILPS the high half of the mask should point /// to the same elements of the low, but to the higher half of the source. /// In VPERMILPD the two lanes could be shuffled independently of each other /// with the same restriction that lanes can't be crossed. -static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { +static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX) { int NumElts = VT.getVectorNumElements(); int NumLanes = VT.getSizeInBits()/128; - if (!Subtarget->hasAVX()) + if (!HasAVX) return false; - // Only match 256-bit with 64-bit types - if (VT.getSizeInBits() != 256 || NumElts != 4) + // Only match 256-bit with 32/64-bit types + if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8)) return false; - // The mask on the high lane is independent of the low. Both can match - // any element in inside its own lane, but can't cross. int LaneSize = NumElts/NumLanes; - for (int l = 0; l < NumLanes; ++l) - for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { - int LaneStart = l*LaneSize; - if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize)) + for (int l = 0; l != NumLanes; ++l) { + int LaneStart = l*LaneSize; + for (int i = 0; i != LaneSize; ++i) { + if (!isUndefOrInRange(Mask[i+LaneStart], LaneStart, LaneStart+LaneSize)) + return false; + if (NumElts == 4 || l == 0) + continue; + // VPERMILPS handling + if (Mask[i] < 0) + continue; + if (!isUndefOrEqual(Mask[i+LaneStart], Mask[i]+LaneSize)) return false; } - - return true; -} - -/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to VPERMILPS*. -/// Note that VPERMIL mask matching is different depending whether theunderlying -/// type is 32 or 64. In the VPERMILPS the high half of the mask should point -/// to the same elements of the low, but to the higher half of the source. -/// In VPERMILPD the two lanes could be shuffled independently of each other -/// with the same restriction that lanes can't be crossed. -static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT, - const X86Subtarget *Subtarget) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned NumLanes = VT.getSizeInBits()/128; - - if (!Subtarget->hasAVX()) - return false; - - // Only match 256-bit with 32-bit types - if (VT.getSizeInBits() != 256 || NumElts != 8) - return false; - - // The mask on the high lane should be the same as the low. Actually, - // they can differ if any of the corresponding index in a lane is undef - // and the other stays in range. - int LaneSize = NumElts/NumLanes; - for (int i = 0; i < LaneSize; ++i) { - int HighElt = i+LaneSize; - bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts); - bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize); - - if (!HighValid || !LowValid) - return false; - if (Mask[i] < 0 || Mask[HighElt] < 0) - continue; - if (Mask[HighElt]-Mask[i] != LaneSize) - return false; } return true; } -/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VPERMILPS* instructions. -static unsigned getShuffleVPERMILPSImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); +/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions. +static unsigned getShuffleVPERMILPImmediate(ShuffleVectorSDNode *SVOp) { EVT VT = SVOp->getValueType(0); int NumElts = VT.getVectorNumElements(); @@ -3911,43 +3770,22 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) { // where a mask will match because the same mask element is undef on the // first half but valid on the second. This would get pathological cases // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid. + unsigned Shift = (LaneSize == 4) ? 2 : 1; unsigned Mask = 0; - for (int l = 0; l < NumLanes; ++l) { - for (int i = 0; i < LaneSize; ++i) { - int MaskElt = SVOp->getMaskElt(i+(l*LaneSize)); - if (MaskElt < 0) - continue; - if (MaskElt >= LaneSize) - MaskElt -= LaneSize; - Mask |= MaskElt << (i*2); - } + for (int i = 0; i != NumElts; ++i) { + int MaskElt = SVOp->getMaskElt(i); + if (MaskElt < 0) + continue; + MaskElt %= LaneSize; + unsigned Shamt = i; + // VPERMILPSY, the mask of the first half must be equal to the second one + if (NumElts == 8) Shamt %= LaneSize; + Mask |= MaskElt << (Shamt*Shift); } return Mask; } -/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle -/// the specified VECTOR_MASK mask with VPERMILPD* instructions. -static unsigned getShuffleVPERMILPDImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - EVT VT = SVOp->getValueType(0); - - int NumElts = VT.getVectorNumElements(); - int NumLanes = VT.getSizeInBits()/128; - - unsigned Mask = 0; - int LaneSize = NumElts/NumLanes; - for (int l = 0; l < NumLanes; ++l) - for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) { - int MaskElt = SVOp->getMaskElt(i); - if (MaskElt < 0) - continue; - Mask |= (MaskElt-l*LaneSize) << i; - } - - return Mask; -} - /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. @@ -4035,21 +3873,18 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 256-bit /// version of MOVDDUP. -static bool isMOVDDUPYMask(ShuffleVectorSDNode *N, - const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); +static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT, + bool HasAVX) { int NumElts = VT.getVectorNumElements(); - bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF; - if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 || - !V2IsUndef || NumElts != 4) + if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4) return false; for (int i = 0; i != NumElts/2; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), 0)) + if (!isUndefOrEqual(Mask[i], 0)) return false; for (int i = NumElts/2; i != NumElts; ++i) - if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2)) + if (!isUndefOrEqual(Mask[i], NumElts/2)) return false; return true; } @@ -4164,14 +3999,13 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. -unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - EVT VVT = N->getValueType(0); - unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3; +static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) { + EVT VT = SVOp->getValueType(0); + unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3; int Val = 0; unsigned i, e; - for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) { + for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) { Val = SVOp->getMaskElt(i); if (Val >= 0) break; @@ -4631,29 +4465,14 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::SHUFPS: case X86ISD::SHUFPD: ImmN = N->getOperand(N->getNumOperands()-1); - DecodeSHUFPSMask(NumElems, - cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: - DecodePUNPCKHMask(NumElems, ShuffleMask); - break; - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - DecodeUNPCKHPMask(VT, ShuffleMask); + DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), + ShuffleMask); break; - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - DecodePUNPCKLMask(VT, ShuffleMask); + case X86ISD::UNPCKH: + DecodeUNPCKHMask(VT, ShuffleMask); break; - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - DecodeUNPCKLPMask(VT, ShuffleMask); + case X86ISD::UNPCKL: + DecodeUNPCKLMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: DecodeMOVHLPSMask(NumElems, ShuffleMask); @@ -4686,27 +4505,12 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, Depth+1); } - case X86ISD::VPERMILPS: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::VPERMILPSY: + case X86ISD::VPERMILP: ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(), + DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); break; - case X86ISD::VPERMILPD: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::VPERMILPDY: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::VPERM2F128: + case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), ShuffleMask); @@ -5334,8 +5138,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DAG); } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); - EVT MiddleVT = MVT::v4i32; + unsigned NumBits = VT.getSizeInBits(); + assert((NumBits == 128 || NumBits == 256) && + "Expected an SSE or AVX value type!"); + EVT MiddleVT = NumBits == 128 ? MVT::v4i32 : MVT::v8i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasXMMInt(), DAG); @@ -6256,7 +6062,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // from X. if (NumHi == 3) { // Normalize it so the 3 elements come from V1. - CommuteVectorShuffleMask(PermMask, VT); + CommuteVectorShuffleMask(PermMask, 4); std::swap(V1, V2); } @@ -6566,70 +6372,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: return X86ISD::PUNPCKLDQ; - case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v8i32: - if (HasAVX2) return X86ISD::PUNPCKLDQ; - // else use fp unit for int unpack. - case MVT::v8f32: - case MVT::v4f32: return X86ISD::UNPCKLPS; - case MVT::v4i64: - if (HasAVX2) return X86ISD::PUNPCKLQDQ; - // else use fp unit for int unpack. - case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKLPD; - case MVT::v32i8: - case MVT::v16i8: return X86ISD::PUNPCKLBW; - case MVT::v16i16: - case MVT::v8i16: return X86ISD::PUNPCKLWD; - default: - llvm_unreachable("Unknown type for unpckl"); - } - return 0; -} - -static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: return X86ISD::PUNPCKHDQ; - case MVT::v2i64: return X86ISD::PUNPCKHQDQ; - case MVT::v8i32: - if (HasAVX2) return X86ISD::PUNPCKHDQ; - // else use fp unit for int unpack. - case MVT::v8f32: - case MVT::v4f32: return X86ISD::UNPCKHPS; - case MVT::v4i64: - if (HasAVX2) return X86ISD::PUNPCKHQDQ; - // else use fp unit for int unpack. - case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKHPD; - case MVT::v32i8: - case MVT::v16i8: return X86ISD::PUNPCKHBW; - case MVT::v16i16: - case MVT::v8i16: return X86ISD::PUNPCKHWD; - default: - llvm_unreachable("Unknown type for unpckh"); - } - return 0; -} - -static inline unsigned getVPERMILOpcode(EVT VT) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: - case MVT::v4f32: return X86ISD::VPERMILPS; - case MVT::v2i64: - case MVT::v2f64: return X86ISD::VPERMILPD; - case MVT::v8i32: - case MVT::v8f32: return X86ISD::VPERMILPSY; - case MVT::v4i64: - case MVT::v4f64: return X86ISD::VPERMILPDY; - default: - llvm_unreachable("Unknown type for vpermil"); - } - return 0; -} - static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -6703,17 +6445,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); - bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; bool V2IsSplat = false; bool HasXMMInt = Subtarget->hasXMMInt(); + bool HasAVX = Subtarget->hasAVX(); bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); + assert(V1.getOpcode() != ISD::UNDEF && "Op 1 of shuffle should not be undef"); + // Vector shuffle lowering takes 3 steps: // // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable @@ -6738,11 +6482,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() && V2IsUndef && RelaxedMayFoldVectorLoad(V1)) @@ -6754,8 +6496,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Use to match splats if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then @@ -6787,8 +6528,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isMOVLMask(SVOp)) { - if (V1IsUndef) - return V2; if (ISD::isBuildVectorAllZeros(V1.getNode())) return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); if (!X86::isMOVLPMask(SVOp)) { @@ -6834,17 +6573,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { V2IsSplat = isSplatVector(V2.getNode()); // Canonicalize the splat or undef, if present, to be on the RHS. - if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { + if (V1IsSplat && !V2IsSplat) { Op = CommuteVectorShuffle(SVOp, DAG); SVOp = cast<ShuffleVectorSDNode>(Op); V1 = SVOp->getOperand(0); V2 = SVOp->getOperand(1); std::swap(V1IsSplat, V2IsSplat); - std::swap(V1IsUndef, V2IsUndef); Commuted = true; } - if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { + SmallVector<int, 32> M; + SVOp->getMask(M); + + if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) { // Shuffling low element of v1 into undef, just return v1. if (V2IsUndef) return V1; @@ -6854,13 +6595,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, - DAG); + if (isUNPCKLMask(M, VT, HasAVX2)) + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG); - if (X86::isUNPCKHMask(SVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, - DAG); + if (isUNPCKHMask(M, VT, HasAVX2)) + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -6884,35 +6623,30 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); if (X86::isUNPCKLMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp, HasAVX2)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, - DAG); + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG); } // Normalize the node to match x86 shuffle ops if needed - if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) + if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true) || + isVSHUFPYMask(M, VT, HasAVX, /* Commuted */ true))) return CommuteVectorShuffle(SVOp, DAG); // The checks below are all present in isShuffleMaskLegal, but they are // inlined here right now to enable us to directly emit target specific // nodes, and remove one by one until they don't return Op anymore. - SmallVector<int, 16> M; - SVOp->getMask(M); if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX())) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, - X86::getShufflePALIGNRImmediate(SVOp), + getShufflePALIGNRImmediate(SVOp), DAG); if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) - return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); - if (VT == MVT::v2i64) - return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); + if (VT == MVT::v2f64 || VT == MVT::v2i64) + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); } if (isPSHUFHWMask(M, VT)) @@ -6929,12 +6663,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, X86::getShuffleSHUFImmediate(SVOp), DAG); - if (X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); - if (X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, - DAG); + if (isUNPCKL_v_undef_Mask(M, VT)) + return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG); + if (isUNPCKH_v_undef_Mask(M, VT)) + return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG); //===--------------------------------------------------------------------===// // Generate target specific nodes for 128 or 256-bit shuffles only @@ -6942,44 +6674,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // // Handle VMOVDDUPY permutations - if (isMOVDDUPYMask(SVOp, Subtarget)) + if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX)) return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG); - // Handle VPERMILPS* permutations - if (isVPERMILPSMask(M, VT, Subtarget)) - return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, - getShuffleVPERMILPSImmediate(SVOp), DAG); - - // Handle VPERMILPD* permutations - if (isVPERMILPDMask(M, VT, Subtarget)) - return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1, - getShuffleVPERMILPDImmediate(SVOp), DAG); + // Handle VPERMILPS/D* permutations + if (isVPERMILPMask(M, VT, HasAVX)) + return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, + getShuffleVPERMILPImmediate(SVOp), DAG); - // Handle VPERM2F128 permutations - if (isVPERM2F128Mask(M, VT, Subtarget)) - return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2, - getShuffleVPERM2F128Immediate(SVOp), DAG); + // Handle VPERM2F128/VPERM2I128 permutations + if (isVPERM2X128Mask(M, VT, HasAVX)) + return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, + V2, getShuffleVPERM2X128Immediate(SVOp), DAG); - // Handle VSHUFPSY permutations - if (isVSHUFPSYMask(M, VT, Subtarget)) + // Handle VSHUFPS/DY permutations + if (isVSHUFPYMask(M, VT, HasAVX)) return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, - getShuffleVSHUFPSYImmediate(SVOp), DAG); - - // Handle VSHUFPDY permutations - if (isVSHUFPDYMask(M, VT, Subtarget)) - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, - getShuffleVSHUFPDYImmediate(SVOp), DAG); - - // Try to swap operands in the node to match x86 shuffle ops - if (isCommutedVSHUFPMask(M, VT, Subtarget)) { - // Now we need to commute operands. - SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG)); - V1 = SVOp->getOperand(0); - V2 = SVOp->getOperand(1); - unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp): - getShuffleVSHUFPSYImmediate(SVOp); - return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG); - } + getShuffleVSHUFPYImmediate(SVOp), DAG); //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, @@ -7888,7 +7599,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, LLVMContext *Context = DAG.getContext(); // Build some magic constants. - std::vector<Constant*> CV0; + SmallVector<Constant*,4> CV0; CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000))); CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000))); CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); @@ -7896,7 +7607,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, Constant *C0 = ConstantVector::get(CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); - std::vector<Constant*> CV1; + SmallVector<Constant*,2> CV1; CV1.push_back( ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); CV1.push_back( @@ -8176,17 +7887,13 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, EVT EltVT = VT; if (VT.isVector()) EltVT = VT.getVectorElementType(); - std::vector<Constant*> CV; + SmallVector<Constant*,4> CV; if (EltVT == MVT::f64) { Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); - CV.push_back(C); - CV.push_back(C); + CV.assign(2, C); } else { Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); + CV.assign(4, C); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8201,19 +7908,18 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT EltVT = VT; - if (VT.isVector()) + unsigned NumElts = VT == MVT::f64 ? 2 : 4; + if (VT.isVector()) { EltVT = VT.getVectorElementType(); - std::vector<Constant*> CV; + NumElts = VT.getVectorNumElements(); + } + SmallVector<Constant*,8> CV; if (EltVT == MVT::f64) { Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); - CV.push_back(C); - CV.push_back(C); + CV.assign(NumElts, C); } else { Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); - CV.push_back(C); + CV.assign(NumElts, C); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -8221,11 +7927,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo::getConstantPool(), false, false, false, 16); if (VT.isVector()) { + MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64; return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(ISD::XOR, dl, MVT::v2i64, - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, + DAG.getNode(ISD::XOR, dl, XORVT, + DAG.getNode(ISD::BITCAST, dl, XORVT, Op.getOperand(0)), - DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask))); + DAG.getNode(ISD::BITCAST, dl, XORVT, Mask))); } else { return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); } @@ -8254,7 +7961,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // type, and that won't be f80 since that is not custom lowered. // First get the sign bit of second operand. - std::vector<Constant*> CV; + SmallVector<Constant*,4> CV; if (SrcVT == MVT::f64) { CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); @@ -9253,7 +8960,7 @@ SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() || - EnableSegmentedStacks) && + getTargetMachine().Options.EnableSegmentedStacks) && "This should be used only on Windows targets or when segmented stacks " "are being used"); assert(!Subtarget->isTargetEnvMacho() && "Not implemented"); @@ -9267,7 +8974,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, bool Is64Bit = Subtarget->is64Bit(); EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; - if (EnableSegmentedStacks) { + if (getTargetMachine().Options.EnableSegmentedStacks) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -9403,7 +9110,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. - assert(!UseSoftFloat && + assert(!getTargetMachine().Options.UseSoftFloat && !(DAG.getMachineFunction() .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && Subtarget->hasXMM()); @@ -10472,7 +10179,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(4, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); @@ -10487,13 +10194,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M, DAG.getConstant(2, MVT::i32)); - R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M); + R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R); // a += a Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op); // return pblendv(r, r+r, a); R = DAG.getNode(ISD::VSELECT, dl, VT, Op, - R, DAG.getNode(ISD::ADD, dl, VT, R, R)); + DAG.getNode(ISD::ADD, dl, VT, R, R), R); return R; } @@ -11194,6 +10901,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::HADD: return "X86ISD::HADD"; + case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::FMAX: return "X86ISD::FMAX"; @@ -11266,24 +10975,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD"; case X86ISD::MOVSD: return "X86ISD::MOVSD"; case X86ISD::MOVSS: return "X86ISD::MOVSS"; - case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; - case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; - case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; - case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; - case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; - case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; - case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; - case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW"; - case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; - case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; - case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; + case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; - case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; - case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; - case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD"; - case X86ISD::VPERMILPDY: return "X86ISD::VPERMILPDY"; - case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; + case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; + case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -11391,7 +11087,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()); + return false; // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -11419,7 +11115,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, return (isMOVLMask(Mask, VT) || isCommutedMOVLMask(Mask, VT, true) || isSHUFPMask(Mask, VT) || - isCommutedSHUFPMask(Mask, VT)); + isSHUFPMask(Mask, VT, /* Commuted */ true)); } return false; } @@ -12289,7 +11985,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - assert(EnableSegmentedStacks); + assert(getTargetMachine().Options.EnableSegmentedStacks); unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; @@ -13169,7 +12865,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; std::swap(LHS, RHS); @@ -13179,7 +12875,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, case ISD::SETOLE: // Converting this to a min would handle comparisons between positive // and negative zero incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMIN; @@ -13197,7 +12893,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, case ISD::SETOGE: // Converting this to a max would handle comparisons between positive // and negative zero incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) break; Opcode = X86ISD::FMAX; @@ -13207,7 +12903,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // the operands would cause it to handle comparisons between positive // and negative zero incorrectly. if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; std::swap(LHS, RHS); @@ -13233,7 +12929,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; @@ -13243,7 +12939,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETUGT: // Converting this to a min would handle NaNs incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; Opcode = X86ISD::FMIN; @@ -13268,7 +12964,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle comparisons between positive // and negative zero incorrectly, and swapping the operands would // cause it to handle NaNs incorrectly. - if (!UnsafeFPMath && + if (!DAG.getTarget().Options.UnsafeFPMath && !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) break; @@ -14048,7 +13744,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X); Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y); Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask); - Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y); + Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } } @@ -14232,7 +13928,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // If we are saving a concatination of two XMM registers, perform two stores. + // If we are saving a concatenation of two XMM registers, perform two stores. // This is better in Sandy Bridge cause one 256-bit mem op is done via two // 128-bit ones. If in the future the cost becomes only one memory access the // first version would be better. @@ -14342,7 +14038,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasXMMInt(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && @@ -14458,7 +14154,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, /// set to A, RHS to B, and the routine returns 'true'. /// Note that the binary operation should have the property that if one of the /// operands is UNDEF then the result is UNDEF. -static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { +static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) { // Look for the following pattern: if // A = < float a0, float a1, float a2, float a3 > // B = < float b0, float b1, float b2, float b3 > @@ -14474,7 +14170,18 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { return false; EVT VT = LHS.getValueType(); - unsigned N = VT.getVectorNumElements(); + + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unsupported vector type for horizontal add/sub"); + + // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to + // operate independently on 128-bit lanes. + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits()/128; + unsigned NumLaneElts = NumElts / NumLanes; + assert((NumLaneElts % 2 == 0) && + "Vector type should have an even number of elements in each lane"); + unsigned HalfLaneElts = NumLaneElts/2; // View LHS in the form // LHS = VECTOR_SHUFFLE A, B, LMask @@ -14483,7 +14190,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { // NOTE: in what follows a default initialized SDValue represents an UNDEF of // type VT. SDValue A, B; - SmallVector<int, 8> LMask(N); + SmallVector<int, 16> LMask(NumElts); if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (LHS.getOperand(0).getOpcode() != ISD::UNDEF) A = LHS.getOperand(0); @@ -14493,14 +14200,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { } else { if (LHS.getOpcode() != ISD::UNDEF) A = LHS; - for (unsigned i = 0; i != N; ++i) + for (unsigned i = 0; i != NumElts; ++i) LMask[i] = i; } // Likewise, view RHS in the form // RHS = VECTOR_SHUFFLE C, D, RMask SDValue C, D; - SmallVector<int, 8> RMask(N); + SmallVector<int, 16> RMask(NumElts); if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) { if (RHS.getOperand(0).getOpcode() != ISD::UNDEF) C = RHS.getOperand(0); @@ -14510,7 +14217,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { } else { if (RHS.getOpcode() != ISD::UNDEF) C = RHS; - for (unsigned i = 0; i != N; ++i) + for (unsigned i = 0; i != NumElts; ++i) RMask[i] = i; } @@ -14525,30 +14232,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) { // If A and B occur in reverse order in RHS, then "swap" them (which means // rewriting the mask). if (A != C) - for (unsigned i = 0; i != N; ++i) { - unsigned Idx = RMask[i]; - if (Idx < N) - RMask[i] += N; - else if (Idx < 2*N) - RMask[i] -= N; - } + CommuteVectorShuffleMask(RMask, NumElts); // At this point LHS and RHS are equivalent to // LHS = VECTOR_SHUFFLE A, B, LMask // RHS = VECTOR_SHUFFLE A, B, RMask // Check that the masks correspond to performing a horizontal operation. - for (unsigned i = 0; i != N; ++i) { - unsigned LIdx = LMask[i], RIdx = RMask[i]; + for (unsigned i = 0; i != NumElts; ++i) { + int LIdx = LMask[i], RIdx = RMask[i]; // Ignore any UNDEF components. - if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N)) - || (!B.getNode() && (LIdx >= N || RIdx >= N))) + if (LIdx < 0 || RIdx < 0 || + (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) || + (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts))) continue; // Check that successive elements are being operated on. If not, this is // not a horizontal operation. - if (!(LIdx == 2*i && RIdx == 2*i + 1) && - !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i)) + unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs + unsigned LaneStart = (i/NumLaneElts) * NumLaneElts; + int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart; + if (!(LIdx == Index && RIdx == Index + 1) && + !(IsCommutative && LIdx == Index + 1 && RIdx == Index)) return false; } @@ -14565,7 +14270,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14579,7 +14285,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14783,7 +14490,8 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, SDValue Op1 = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && + if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + (Subtarget->hasAVX2() && (VT == MVT::v16i16 || MVT::v8i32))) && isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1); @@ -14815,8 +14523,9 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, // Try to synthesize horizontal adds from adds of shuffles. EVT VT = N->getValueType(0); - if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && - isHorizontalBinOp(Op0, Op1, false)) + if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) || + (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) && + isHorizontalBinOp(Op0, Op1, true)) return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1); return OptimizeConditionalInDecrement(N, DAG); @@ -14857,18 +14566,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::SHUFPS: // Handle all target specific shuffles case X86ISD::SHUFPD: case X86ISD::PALIGN: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: + case X86ISD::UNPCKH: + case X86ISD::UNPCKL: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: @@ -14876,11 +14575,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PSHUFLW: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::VPERMILPS: - case X86ISD::VPERMILPSY: - case X86ISD::VPERMILPD: - case X86ISD::VPERMILPDY: - case X86ISD::VPERM2F128: + case X86ISD::VPERMILP: + case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ccff3a5..cfc1f88 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -273,23 +273,10 @@ namespace llvm { MOVLPD, MOVSD, MOVSS, - UNPCKLPS, - UNPCKLPD, - UNPCKHPS, - UNPCKHPD, - PUNPCKLBW, - PUNPCKLWD, - PUNPCKLDQ, - PUNPCKLQDQ, - PUNPCKHBW, - PUNPCKHWD, - PUNPCKHDQ, - PUNPCKHQDQ, - VPERMILPS, - VPERMILPSY, - VPERMILPD, - VPERMILPDY, - VPERM2F128, + UNPCKL, + UNPCKH, + VPERMILP, + VPERM2X128, VBROADCAST, // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, @@ -468,10 +455,6 @@ namespace llvm { /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction. unsigned getShufflePSHUFLWImmediate(SDNode *N); - /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle - /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. - unsigned getShufflePALIGNRImmediate(SDNode *N); - /// getExtractVEXTRACTF128Immediate - Return the appropriate /// immediate to extract the specified EXTRACT_SUBVECTOR index /// with VEXTRACTF128 instructions. diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index d868773..f443088 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -58,3 +58,391 @@ let isAsmParserOnly = 1 in { defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">; defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W; } + +//===----------------------------------------------------------------------===// +// FMA4 - AMD 4 operand Fused Multiply-Add instructions +//===----------------------------------------------------------------------===// + + +multiclass fma4s<bits<8> opc, string OpcodeStr> { + def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>; + +} + +multiclass fma4p<bits<8> opc, string OpcodeStr> { + def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>; + def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, XOP_W; + def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>; +} + +let isAsmParserOnly = 1 in { + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss">; + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd">; + defm VFMADDPS4 : fma4p<0x68, "vfmaddps">; + defm VFMADDPD4 : fma4p<0x69, "vfmaddpd">; + defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss">; + defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd">; + defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps">; + defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd">; + defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss">; + defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd">; + defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps">; + defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd">; + defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss">; + defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd">; + defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps">; + defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd">; + defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">; + defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">; + defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">; + defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">; +} + +// FMA4 Intrinsics patterns + +// VFMADD +def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFMSUB +def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFNMADD +def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFNMSUB +def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFMADDSUB +def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +// VFMSUBADD +def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, + (alignedloadv4f32 addr:$src3)), + (VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2), + VR128:$src3), + (VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3), + (VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, + (alignedloadv2f64 addr:$src3)), + (VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2), + VR128:$src3), + (VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>; + +def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, + (alignedloadv8f32 addr:$src3)), + (VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, + (alignedloadv8f32 addr:$src2), + VR256:$src3), + (VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>; + +def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3), + (VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, + (alignedloadv4f64 addr:$src3)), + (VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>; +def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, + (alignedloadv4f64 addr:$src2), + VR256:$src3), + (VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index ecd6a93..7ba3639 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -110,6 +110,8 @@ class A7 { bits<5> Prefix = 16; } class T8XD { bits<5> Prefix = 17; } class T8XS { bits<5> Prefix = 18; } class TAXD { bits<5> Prefix = 19; } +class XOP8 { bits<5> Prefix = 20; } +class XOP9 { bits<5> Prefix = 21; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } @@ -118,7 +120,8 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class VEX_L { bit hasVEX_L = 1; } class VEX_LIG { bit ignoresVEX_L = 1; } class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } - +class XOP_W { bit hasXOP_WPrefix = 1; } +class XOP { bit hasXOP_Prefix = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> : Instruction { @@ -158,6 +161,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasVEX_L = 0; // Does this inst use large (256-bit) registers? bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? + bit hasXOP_WPrefix = 0; // Same bit as VEX_W, but used for swapping operands + bit hasXOP_Prefix = 0; // Does this inst require an XOP prefix? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -179,6 +184,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{38} = hasVEX_L; let TSFlags{39} = ignoresVEX_L; let TSFlags{40} = has3DNow0F0FOpcode; + let TSFlags{41} = hasXOP_WPrefix; + let TSFlags{42} = hasXOP_Prefix; } class PseudoI<dag oops, dag iops, list<dag> pattern> @@ -332,6 +339,10 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB, Requires<[HasAVX]>; +class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, + Requires<[HasXMM]>; // SSE2 Instruction Templates: // @@ -496,6 +507,30 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize, VEX_4V, Requires<[HasFMA3]>; +// FMA4 Instruction Templates +class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; + +// XOP 2, 3 and 4 Operand Instruction Template +class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + XOP, XOP9, Requires<[HasXOP]>; + +// XOP 2, 3 and 4 Operand Instruction Templates with imm byte +class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, + XOP, XOP8, Requires<[HasXOP]>; + +// XOP 5 operand instruction (VEX encoding!) +class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern> + : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, + OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 791bbe6..cd13bc4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -130,28 +130,12 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; -def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; +def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; -def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; -def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; +def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; -def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; -def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; -def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; -def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; - -def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; -def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; -def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; -def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; - -def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; -def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; -def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>; -def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>; - -def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>; +def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; @@ -363,12 +347,6 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); }]>; -// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to -// a PALIGNR imm. -def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePALIGNRImmediate(N)); -}]>; - // EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index // to VEXTRACTF128 imm. def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{ diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 24c4a53..7d1b9a1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1528,9 +1528,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); // Build and insert into an implicit UNDEF value. This is OK because // well be shifting and then extracting the lower 16-bits. - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); + BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2); InsMI2 = - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) + BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) .addReg(leaInReg2, RegState::Define, X86::sub_16bit) .addReg(Src2, getKillRegState(isKill2)); addRegReg(MIB, leaInReg, true, leaInReg2, true); @@ -2040,13 +2040,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { } bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isTerminator()) return false; + if (!MI->isTerminator()) return false; // Conditional branch is a special case. - if (MCID.isBranch() && !MCID.isBarrier()) + if (MI->isBranch() && !MI->isBarrier()) return true; - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return true; return !isPredicated(MI); } @@ -2072,7 +2071,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // A terminator that isn't a branch can't easily be handled by this // analysis. - if (!I->getDesc().isBranch()) + if (!I->isBranch()) return true; // Handle unconditional branches. @@ -2556,6 +2555,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); switch (MI->getOpcode()) { case X86::V_SET0: + case X86::FsFLD0SS: + case X86::FsFLD0SD: return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); @@ -2771,7 +2772,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) { case X86::RCPSSr: case X86::RCPSSr_Int: case X86::ROUNDSDr: + case X86::ROUNDSDr_Int: case X86::ROUNDSSr: + case X86::ROUNDSSr_Int: case X86::RSQRTSSr: case X86::RSQRTSSr_Int: case X86::SQRTSSr: @@ -2783,7 +2786,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) { case X86::Int_VCVTSS2SDrr: case X86::VRCPSSr: case X86::VROUNDSDr: + case X86::VROUNDSDr_Int: case X86::VROUNDSSr: + case X86::VROUNDSSr_Int: case X86::VRSQRTSSr: case X86::VSQRTSSr: return true; @@ -2911,11 +2916,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = 16; break; case X86::FsFLD0SD: - case X86::VFsFLD0SD: Alignment = 8; break; case X86::FsFLD0SS: - case X86::VFsFLD0SS: Alignment = 4; break; default: @@ -2950,9 +2953,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SETALLONES: case X86::AVX2_SETALLONES: case X86::FsFLD0SD: - case X86::FsFLD0SS: - case X86::VFsFLD0SD: - case X86::VFsFLD0SS: { + case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. @@ -2978,9 +2979,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineConstantPool &MCP = *MF.getConstantPool(); Type *Ty; unsigned Opc = LoadMI->getOpcode(); - if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS) + if (Opc == X86::FsFLD0SS) Ty = Type::getFloatTy(MF.getFunction()->getContext()); - else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD) + else if (Opc == X86::FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY) Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8); @@ -3569,7 +3570,13 @@ static const unsigned ReplaceableInstrsAVX2[][3] = { { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, - { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr } + { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, + { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, + { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, + { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, + { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, + { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr } }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 35631d5..0bc3afa 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -473,6 +473,7 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; +def HasXMM : Predicate<"Subtarget->hasXMM()">; def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; @@ -480,6 +481,7 @@ def HasAES : Predicate<"Subtarget->hasAES()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; +def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; @@ -1502,6 +1504,9 @@ include "X86InstrFragmentsSIMD.td" // FMA - Fused Multiply-Add support (requires FMA) include "X86InstrFMA.td" +// XOP +include "X86InstrXOP.td" + // SSE, MMX and 3DNow! vector support. include "X86InstrSSE.td" include "X86InstrMMX.td" diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7cadac1..345f606 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -240,21 +240,13 @@ let Predicates = [HasAVX] in { } // Alias instructions that map fld0 to pxor for sse. -// FIXME: Set encoding to pseudo! -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in { - def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; - def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasSSE2]>, TB, OpSize; - def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; - def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasAVX]>, TB, OpSize, VEX_4V; +// This is expanded by ExpandPostRAPseudos. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isPseudo = 1 in { + def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>; + def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>; } //===----------------------------------------------------------------------===// @@ -569,6 +561,16 @@ let Predicates = [HasAVX] in { (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + + // Move low f32 and clear high bits. + def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>; + def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>; } let AddedComplexity = 20 in { @@ -596,6 +598,9 @@ let Predicates = [HasAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; @@ -613,6 +618,15 @@ let Predicates = [HasAVX] in { (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + + // Move low f64 and clear high bits. + def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), @@ -634,6 +648,16 @@ let Predicates = [HasAVX] in { (VMOVSSrr (v4f32 VR128:$src1), (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + // 256-bit variants + def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss), + (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>; + def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss), + (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>; + // Shuffle with VMOVSD def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), (VMOVSDrr VR128:$src1, FR64:$src2)>; @@ -650,6 +674,17 @@ let Predicates = [HasAVX] in { (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; + // 256-bit variants + def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd), + (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>; + def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), + (SUBREG_TO_REG (i32 0), + (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd), + (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>; + + // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the @@ -657,6 +692,9 @@ let Predicates = [HasAVX] in { def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2), + sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; @@ -761,6 +799,22 @@ let isCodeGenOnly = 1 in { "movupd\t{$src, $dst|$dst, $src}", []>, VEX; } +let Predicates = [HasAVX] in { +def : Pat<(v8i32 (X86vzmovl + (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +def : Pat<(v4i64 (X86vzmovl + (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +def : Pat<(v8f32 (X86vzmovl + (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +def : Pat<(v4f64 (X86vzmovl + (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))), + (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; +} + + def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), (VMOVUPSYmr addr:$dst, VR256:$src)>; @@ -1156,14 +1210,17 @@ let Predicates = [HasAVX] in { (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (VMOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (VMOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), (VMOVHPSrm VR128:$src1, addr:$src2)>; - // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (VMOVHPDrm VR128:$src1, addr:$src2)>; @@ -1174,10 +1231,10 @@ let Predicates = [HasAVX] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPSmr addr:$dst, VR128:$src)>; def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPDmr addr:$dst, VR128:$src)>; } @@ -1189,21 +1246,24 @@ let Predicates = [HasSSE1] in { (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVHPSrm VR128:$src1, addr:$src2)>; def : Pat<(X86Movlhps VR128:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), (MOVHPSrm VR128:$src1, addr:$src2)>; // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst), (MOVHPSmr addr:$dst, VR128:$src)>; } let Predicates = [HasSSE2] in { - // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; @@ -1214,7 +1274,7 @@ let Predicates = [HasSSE2] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; } @@ -1943,7 +2003,7 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // whenever possible to avoid declaring two versions of each one. def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), (VCVTDQ2PSYrr VR256:$src)>; -def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)), +def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), (VCVTDQ2PSYrm addr:$src)>; def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), @@ -2430,27 +2490,27 @@ let AddedComplexity = 10 in { } // AddedComplexity let Predicates = [HasSSE1] in { - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), (UNPCKHPSrr VR128:$src1, VR128:$src2)>; } let Predicates = [HasSSE2] in { - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), (UNPCKHPDrr VR128:$src1, VR128:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -2463,59 +2523,43 @@ let Predicates = [HasSSE2] in { } let Predicates = [HasAVX] in { - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)), (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)), - (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), - (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))), - (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)), - (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))), - (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)), - (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))), - (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)), - (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -2869,7 +2913,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins ssmem:$src1, VR128:$src2), + (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; } @@ -3198,13 +3242,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), //===----------------------------------------------------------------------===// // Prefetch intrinsic. -def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), +def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; -def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), +def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src), "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; -def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), +def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src), "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; -def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), +def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; // Flush cache @@ -3652,6 +3696,8 @@ defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3666,17 +3712,6 @@ let ExeDomain = SSEPackedInt in { VEX_4V; // PSRADQri doesn't exist in SSE[1-3]. } - def VPANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V; - - def VPANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (X86andnp VR128:$src1, - (memopv2i64 addr:$src2)))]>, VEX_4V; } } @@ -3714,6 +3749,8 @@ defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3728,17 +3765,6 @@ let ExeDomain = SSEPackedInt in { VEX_4V; // PSRADQYri doesn't exist in SSE[1-3]. } - def VPANDNYrr : PDI<0xDF, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, - (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V; - - def VPANDNYrm : PDI<0xDF, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (X86andnp VR256:$src1, - (memopv4i64 addr:$src2)))]>, VEX_4V; } } @@ -3776,6 +3802,8 @@ defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, i128mem, 1>; defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, i128mem, 1>; +defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, + i128mem, 0>; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3787,14 +3815,6 @@ let ExeDomain = SSEPackedInt in { (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), "psrldq\t{$src2, $dst|$dst, $src2}", []>; // PSRADQri doesn't exist in SSE[1-3]. - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; - - let mayLoad = 1 in - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; } } } // Constraints = "$src1 = $dst" @@ -4198,66 +4218,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, } let Predicates = [HasAVX] in { - defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, bc_v2i64, 0>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, bc_v32i8>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, bc_v16i16>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, bc_v8i32>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq, + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, bc_v4i64>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, bc_v32i8>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, bc_v16i16>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, bc_v8i32>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq, + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, bc_v4i32>; - defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, bc_v2i64>; - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, bc_v4i32>; - defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, bc_v2i64>; } } // ExeDomain = SSEPackedInt +// Patterns for using AVX1 instructions with integer vectors +// Here to give AVX2 priority +let Predicates = [HasAVX] in { + def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), + (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), + (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; +} + // Splat v2f64 / v2i64 let AddedComplexity = 10 in { def : Pat<(splat_lo (v2i64 VR128:$src), (undef)), @@ -4784,7 +4826,7 @@ def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // AVX 256-bit register conversion intrinsics def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)), +def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), (VCVTDQ2PDYrm addr:$src)>; def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), @@ -4794,7 +4836,7 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), (VCVTDQ2PDYrr VR128:$src)>; -def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))), +def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), (VCVTDQ2PDYrm addr:$src)>; //===---------------------------------------------------------------------===// @@ -5085,7 +5127,7 @@ let Constraints = "$src1 = $dst" in { /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag128, Intrinsic IntId128> { + Intrinsic IntId128> { def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -5097,12 +5139,12 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (mem_frag128 addr:$src))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src))))]>, OpSize; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, - PatFrag mem_frag256, Intrinsic IntId256> { + Intrinsic IntId256> { def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -5114,32 +5156,32 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 - (bitconvert (mem_frag256 addr:$src))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize; } let Predicates = [HasAVX] in { - defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128>, VEX; - defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16, + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", int_x86_ssse3_pabs_w_128>, VEX; - defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32, + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128>, VEX; } let Predicates = [HasAVX2] in { - defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8, + defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", int_x86_avx2_pabs_b>, VEX; - defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16, + defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", int_x86_avx2_pabs_w>, VEX; - defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32, + defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", int_x86_avx2_pabs_d>, VEX; } -defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, +defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", int_x86_ssse3_pabs_b_128>; -defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, +defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", int_x86_ssse3_pabs_w_128>; -defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, +defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128>; //===---------------------------------------------------------------------===// @@ -5148,8 +5190,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32, /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, - PatFrag mem_frag128, Intrinsic IntId128, - bit Is2Addr = 1> { + Intrinsic IntId128, bit Is2Addr = 1> { let isCommutable = 1 in def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -5165,11 +5206,11 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (mem_frag128 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, - PatFrag mem_frag256, Intrinsic IntId256> { + Intrinsic IntId256> { let isCommutable = 1 in def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), @@ -5181,94 +5222,94 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntId256 VR256:$src1, - (bitconvert (mem_frag256 addr:$src2))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; } let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", int_x86_ssse3_phadd_w_128, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32, + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", int_x86_ssse3_phadd_d_128, 0>, VEX_4V; - defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16, + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16, + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", int_x86_ssse3_phsub_w_128, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32, + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", int_x86_ssse3_phsub_d_128, 0>, VEX_4V; - defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16, + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; - defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8, + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8, + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8, + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", int_x86_ssse3_psign_b_128, 0>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16, + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", int_x86_ssse3_psign_w_128, 0>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32, + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", int_x86_ssse3_psign_d_128, 0>, VEX_4V; } -defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16, + defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", int_x86_avx2_phadd_w>, VEX_4V; - defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32, + defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", int_x86_avx2_phadd_d>, VEX_4V; - defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16, + defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16, + defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", int_x86_avx2_phsub_w>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32, + defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", int_x86_avx2_phsub_d>, VEX_4V; - defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16, + defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw>, VEX_4V; - defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8, + defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", int_x86_avx2_pmadd_ub_sw>, VEX_4V; - defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8, + defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", int_x86_avx2_pshuf_b>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8, + defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16, + defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32, + defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d>, VEX_4V; } -defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16, +defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", int_x86_avx2_pmul_hr_sw>, VEX_4V; } // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16, + defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", int_x86_ssse3_phadd_w_128>; - defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32, + defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", int_x86_ssse3_phadd_d_128>; - defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16, + defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128>; - defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16, + defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", int_x86_ssse3_phsub_w_128>; - defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32, + defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", int_x86_ssse3_phsub_d_128>; - defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16, + defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", int_x86_ssse3_phsub_sw_128>; - defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8, + defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", int_x86_ssse3_pmadd_ub_sw_128>; - defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8, + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", int_x86_ssse3_pshuf_b_128>; - defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8, + defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128>; - defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16, + defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128>; - defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32, + defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128>; } -defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16, +defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw_128>; } @@ -6017,8 +6058,18 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, Intrinsic F32Int, Intrinsic F64Int, bit Is2Addr = 1> { let ExeDomain = GenericDomain in { - // Intrinsic operation, reg. + // Operation, reg. def SSr : SS4AIi8<opcss, MRMSrcReg, + (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + + // Intrinsic operation, reg. + def SSr_Int : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -6040,8 +6091,18 @@ let ExeDomain = GenericDomain in { (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, OpSize; - // Intrinsic operation, reg. + // Operation, reg. def SDr : SS4AIi8<opcsd, MRMSrcReg, + (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + + // Intrinsic operation, reg. + def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), !if(Is2Addr, !strconcat(OpcodeStr, @@ -6079,6 +6140,27 @@ let Predicates = [HasAVX] in { defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; + + def : Pat<(ffloor FR32:$src), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; + def : Pat<(f64 (ffloor FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; + def : Pat<(f32 (fnearbyint FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; + def : Pat<(f64 (fnearbyint FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; + def : Pat<(f32 (fceil FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; + def : Pat<(f64 (fceil FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; + def : Pat<(f32 (frint FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; + def : Pat<(f64 (frint FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; + def : Pat<(f32 (ftrunc FR32:$src)), + (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; + def : Pat<(f64 (ftrunc FR64:$src)), + (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6088,6 +6170,27 @@ let Constraints = "$src1 = $dst" in defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", int_x86_sse41_round_ss, int_x86_sse41_round_sd>; +def : Pat<(ffloor FR32:$src), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; +def : Pat<(f64 (ffloor FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; +def : Pat<(f32 (fnearbyint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; +def : Pat<(f64 (fnearbyint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; +def : Pat<(f32 (fceil FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; +def : Pat<(f64 (fceil FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; +def : Pat<(f32 (frint FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; +def : Pat<(f64 (frint FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; +def : Pat<(f32 (ftrunc FR32:$src)), + (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; +def : Pat<(f64 (ftrunc FR64:$src)), + (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; + //===----------------------------------------------------------------------===// // SSE4.1 - Packed Bit Test //===----------------------------------------------------------------------===// @@ -6195,7 +6298,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (memopv8i16 addr:$src))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src))))]>, OpSize; } let Predicates = [HasAVX] in @@ -6221,7 +6324,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator @@ -6237,7 +6340,7 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntId256 VR256:$src1, - (bitconvert (memopv32i8 addr:$src2))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; } let Predicates = [HasAVX] in { @@ -6400,38 +6503,38 @@ let Predicates = [HasAVX] in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv4f32, i128mem, 0>, VEX_4V; defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V; } let ExeDomain = SSEPackedDouble in { defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2f64, i128mem, 0>, VEX_4V; defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V; } defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2i64, i128mem, 0>, VEX_4V; defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2i64, i128mem, 0>, VEX_4V; } let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv4f32, i128mem, 0>, VEX_4V; let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, - VR128, memopv16i8, i128mem, 0>, VEX_4V; + VR128, memopv2f64, i128mem, 0>, VEX_4V; let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, - VR256, memopv32i8, i256mem, 0>, VEX_4V; + VR256, memopv8f32, i256mem, 0>, VEX_4V; } let Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, - VR256, memopv32i8, i256mem, 0>, VEX_4V; + VR256, memopv4i64, i256mem, 0>, VEX_4V; defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, - VR256, memopv32i8, i256mem, 0>, VEX_4V; + VR256, memopv4i64, i256mem, 0>, VEX_4V; } } @@ -6439,35 +6542,35 @@ let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, - VR128, memopv16i8, i128mem>; + VR128, memopv4f32, i128mem>; let ExeDomain = SSEPackedDouble in defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, - VR128, memopv16i8, i128mem>; + VR128, memopv2f64, i128mem>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, - VR128, memopv16i8, i128mem>; + VR128, memopv2i64, i128mem>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv16i8, i128mem>; + VR128, memopv2i64, i128mem>; } let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, - VR128, memopv16i8, i128mem>; + VR128, memopv4f32, i128mem>; let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, - VR128, memopv16i8, i128mem>; + VR128, memopv2f64, i128mem>; } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, Intrinsic IntId> { - def rr : I<opc, MRMSrcReg, (outs RC:$dst), + def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; - def rm : I<opc, MRMSrcMem, (outs RC:$dst), + def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), @@ -6480,23 +6583,23 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX] in { let ExeDomain = SSEPackedDouble in { defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, - memopv16i8, int_x86_sse41_blendvpd>; + memopv2f64, int_x86_sse41_blendvpd>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, - memopv32i8, int_x86_avx_blendv_pd_256>; + memopv4f64, int_x86_avx_blendv_pd_256>; } // ExeDomain = SSEPackedDouble let ExeDomain = SSEPackedSingle in { defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, - memopv16i8, int_x86_sse41_blendvps>; + memopv4f32, int_x86_sse41_blendvps>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, - memopv32i8, int_x86_avx_blendv_ps_256>; + memopv8f32, int_x86_avx_blendv_ps_256>; } // ExeDomain = SSEPackedSingle defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, - memopv16i8, int_x86_sse41_pblendvb>; + memopv2i64, int_x86_sse41_pblendvb>; } let Predicates = [HasAVX2] in { defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, - memopv32i8, int_x86_avx2_pblendvb>; + memopv4i64, int_x86_avx2_pblendvb>; } let Predicates = [HasAVX] in { @@ -6537,7 +6640,8 @@ let Predicates = [HasAVX2] in { /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { - multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { + multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag, + Intrinsic IntId> { def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, @@ -6551,15 +6655,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize; + (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize; } } let ExeDomain = SSEPackedDouble in -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, + int_x86_sse41_blendvpd>; let ExeDomain = SSEPackedSingle in -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, + int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, + int_x86_sse41_pblendvb>; let Predicates = [HasSSE41] in { def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), @@ -6614,8 +6721,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator @@ -6630,8 +6736,7 @@ multiclass SS42I_binop_rm_int_y<bits<8> opc, string OpcodeStr, (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (IntId256 VR256:$src1, - (bitconvert (memopv32i8 addr:$src2))))]>, OpSize; + (IntId256 VR256:$src1, (memopv4i64 addr:$src2)))]>, OpSize; } let Predicates = [HasAVX] in { @@ -6913,7 +7018,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; } // Perform One Round of an AES Encryption/Decryption Flow @@ -7144,7 +7249,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7163,35 +7268,10 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -7210,31 +7290,6 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4f32 (VEXTRACTF128rr - (v8f32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2f64 (VEXTRACTF128rr - (v4f64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; - //===----------------------------------------------------------------------===// // VMASKMOV - Conditional SIMD Packed Loads and Stores // @@ -7288,7 +7343,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop_i:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V; + [(set RC:$dst, (IntVar RC:$src1, + (bitconvert (i_frag addr:$src2))))]>, VEX_4V; def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), @@ -7302,11 +7358,11 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, let ExeDomain = SSEPackedSingle in { defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - memopv4f32, memopv4i32, + memopv4f32, memopv2i64, int_x86_avx_vpermilvar_ps, int_x86_avx_vpermil_ps>; defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - memopv8f32, memopv8i32, + memopv8f32, memopv4i64, int_x86_avx_vpermilvar_ps_256, int_x86_avx_vpermil_ps_256>; } @@ -7321,19 +7377,28 @@ let ExeDomain = SSEPackedDouble in { int_x86_avx_vpermil_pd_256>; } -def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; -def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; -def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), +def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), (VPERMILPDYri VR256:$src1, imm:$imm)>; +def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))), + (VPERMILPSYmi addr:$src1, imm:$imm)>; +def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))), + (VPERMILPDYmi addr:$src1, imm:$imm)>; +def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)), + (i8 imm:$imm))), + (VPERMILPSYmi addr:$src1, imm:$imm)>; +def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))), + (VPERMILPDYmi addr:$src1, imm:$imm)>; //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -7359,22 +7424,9 @@ def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, (memopv4f64 addr:$src2), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vperm2f128_si_256 - VR256:$src1, (memopv8i32 addr:$src2), imm:$src3), + VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; - //===----------------------------------------------------------------------===// // VZERO - Zero YMM registers // @@ -7451,9 +7503,9 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, let isCommutable = 0 in { defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, - VR128, memopv16i8, i128mem>; + VR128, memopv2i64, i128mem>; defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, - VR256, memopv32i8, i256mem>; + VR256, memopv4i64, i256mem>; } //===----------------------------------------------------------------------===// @@ -7541,11 +7593,12 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>, + [(set VR256:$dst, (Int VR256:$src1, + (bitconvert (mem_frag addr:$src2))))]>, VEX_4V; } -defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>; +defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>; let ExeDomain = SSEPackedSingle in defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; @@ -7571,7 +7624,7 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, VEX_W; //===----------------------------------------------------------------------===// -// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks +// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), @@ -7587,6 +7640,64 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), imm:$src3))]>, VEX_4V; +let Predicates = [HasAVX2] in { +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; + +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), + (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), + (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), + (i8 imm:$imm))), + (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; +} + +// AVX1 patterns +def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; + +def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, + (memopv8f32 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, + (memopv4i64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; + + //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values // @@ -7603,6 +7714,51 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), imm:$src3))]>, VEX_4V; +let Predicates = [HasAVX2] in { +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTI128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + +// AVX1 patterns +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7617,6 +7773,51 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +let Predicates = [HasAVX2] in { +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTI128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTI128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTI128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTI128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +} + +// AVX1 patterns +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTF128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTF128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; + //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores // diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td new file mode 100644 index 0000000..64cc44d --- /dev/null +++ b/lib/Target/X86/X86InstrXOP.td @@ -0,0 +1,243 @@ +//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes XOP (eXtended OPerations) +// +//===----------------------------------------------------------------------===// + +multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> { + def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX; +} + +let isAsmParserOnly = 1 in { + defm VPHSUBWD : xop2op<0xE2, "vphsubwd", f128mem>; + defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", f128mem>; + defm VPHSUBBW : xop2op<0xE1, "vphsubbw", f128mem>; + defm VPHADDWQ : xop2op<0xC7, "vphaddwq", f128mem>; + defm VPHADDWD : xop2op<0xC6, "vphaddwd", f128mem>; + defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>; + defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>; + defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>; + defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>; + defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>; + defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>; + defm VPHADDDQ : xop2op<0xCB, "vphadddq", f128mem>; + defm VPHADDBW : xop2op<0xC1, "vphaddbw", f128mem>; + defm VPHADDBQ : xop2op<0xC3, "vphaddbq", f128mem>; + defm VPHADDBD : xop2op<0xC2, "vphaddbd", f128mem>; + defm VFRCZSS : xop2op<0x82, "vfrczss", f32mem>; + defm VFRCZSD : xop2op<0x83, "vfrczsd", f64mem>; + defm VFRCZPS : xop2op<0x80, "vfrczps", f128mem>; + defm VFRCZPD : xop2op<0x81, "vfrczpd", f128mem>; +} + +multiclass xop2op256<bits<8> opc, string OpcodeStr> { + def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX, VEX_L; + def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + []>, VEX; +} + +let isAsmParserOnly = 1 in { + defm VFRCZPS : xop2op256<0x80, "vfrczps">; + defm VFRCZPD : xop2op256<0x81, "vfrczpd">; +} + +multiclass xop3op<bits<8> opc, string OpcodeStr> { + def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4VOp3; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4V, VEX_W; + def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX_4VOp3; +} + +let isAsmParserOnly = 1 in { + defm VPSHLW : xop3op<0x95, "vpshlw">; + defm VPSHLQ : xop3op<0x97, "vpshlq">; + defm VPSHLD : xop3op<0x96, "vpshld">; + defm VPSHLB : xop3op<0x94, "vpshlb">; + defm VPSHAW : xop3op<0x99, "vpshaw">; + defm VPSHAQ : xop3op<0x9B, "vpshaq">; + defm VPSHAD : xop3op<0x9A, "vpshad">; + defm VPSHAB : xop3op<0x98, "vpshab">; + defm VPROTW : xop3op<0x91, "vprotw">; + defm VPROTQ : xop3op<0x93, "vprotq">; + defm VPROTD : xop3op<0x92, "vprotd">; + defm VPROTB : xop3op<0x90, "vprotb">; +} + +multiclass xop3opimm<bits<8> opc, string OpcodeStr> { + def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX; + def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, VEX; +} + +let isAsmParserOnly = 1 in { + defm VPROTW : xop3opimm<0xC1, "vprotw">; + defm VPROTQ : xop3opimm<0xC3, "vprotq">; + defm VPROTD : xop3opimm<0xC2, "vprotd">; + defm VPROTB : xop3opimm<0xC0, "vprotb">; +} + +// Instruction where second source can be memory, but third must be register +multiclass xop4opm2<bits<8> opc, string OpcodeStr> { + def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; + def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd">; + defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">; + defm VPMACSWW : xop4opm2<0x95, "vpmacsww">; + defm VPMACSWD : xop4opm2<0x96, "vpmacswd">; + defm VPMACSSWW : xop4opm2<0x85, "vpmacssww">; + defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd">; + defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">; + defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">; + defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd">; + defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql">; + defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh">; + defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd">; +} + +// Instruction where second source can be memory, third must be imm8 +multiclass xop4opimm<bits<8> opc, string OpcodeStr> { + def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V; + def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, i8imm:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V; +} + +let isAsmParserOnly = 1 in { + defm VPCOMW : xop4opimm<0xCD, "vpcomw">; + defm VPCOMUW : xop4opimm<0xED, "vpcomuw">; + defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq">; + defm VPCOMUD : xop4opimm<0xEE, "vpcomud">; + defm VPCOMUB : xop4opimm<0xEC, "vpcomub">; + defm VPCOMQ : xop4opimm<0xCF, "vpcomq">; + defm VPCOMD : xop4opimm<0xCE, "vpcomd">; + defm VPCOMB : xop4opimm<0xCC, "vpcomb">; +} + +// Instruction where either second or third source can be memory +multiclass xop4op<bits<8> opc, string OpcodeStr> { + def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; + def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM, XOP_W; + def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPPERM : xop4op<0xA3, "vpperm">; + defm VPCMOV : xop4op<0xA2, "vpcmov">; +} + +multiclass xop4op256<bits<8> opc, string OpcodeStr> { + def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; + def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM, XOP_W; + def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, VEX_4V, VEX_I8IMM; +} + +let isAsmParserOnly = 1 in { + defm VPCMOV : xop4op256<0xA2, "vpcmov">; +} + +multiclass xop5op<bits<8> opc, string OpcodeStr> { + def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; + def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>, XOP_W; + def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; + def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; + def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>, XOP_W; + def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4), + !strconcat(OpcodeStr, + "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), + []>; +} + +let isAsmParserOnly = 1 in { + defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">; + defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">; +} diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 3f88fa6..2145a33 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -424,7 +424,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { TargetJITInfo::LazyResolverFn X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { + TsanIgnoreWritesBegin(); JITCompilerFunction = F; + TsanIgnoreWritesEnd(); #if defined (X86_32_JIT) && !defined (_MSC_VER) if (Subtarget->hasSSE1()) diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 81ee665..9232196 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -368,10 +368,6 @@ ReSimplify: case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; - case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; - case X86::VFsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; - case X86::VFsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break; case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c1ac9f3..4e80432 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -452,7 +452,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (RealignStack && + return (MF.getTarget().Options.RealignStack && !MFI->hasVarSizedObjects()); } @@ -583,7 +583,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // sure we restore the stack pointer immediately after the call, there may // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. MachineBasicBlock::iterator B = MBB.begin(); - while (I != B && !llvm::prior(I)->getDesc().isCall()) + while (I != B && !llvm::prior(I)->isCall()) --I; MBB.insert(I, New); } @@ -665,7 +665,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { case MVT::i8: if (High) { switch (Reg) { - default: return 0; + default: return getX86SubSuperRegister(Reg, MVT::i64, High); case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: return X86::AH; case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: @@ -785,6 +785,22 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { return X86::R15D; } case MVT::i64: + // For 64-bit mode if we've requested a "high" register and the + // Q or r constraints we want one of these high registers or + // just the register name otherwise. + if (High) { + switch (Reg) { + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; + // Fallthrough. + } + } switch (Reg) { default: return Reg; case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e7bcbf8..6e092c7 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -273,6 +273,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if (IsAMD && ((ECX >> 16) & 0x1)) { HasFMA4 = true; ToggleFeature(X86::FeatureFMA4); + HasXOP = true; + ToggleFeature(X86::FeatureXOP); } } } @@ -317,6 +319,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasCLMUL(false) , HasFMA3(false) , HasFMA4(false) + , HasXOP(false) , HasMOVBE(false) , HasRDRAND(false) , HasF16C(false) @@ -387,9 +390,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, assert((!In64BitMode || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - if(EnableSegmentedStacks && !isTargetELF()) - report_fatal_error("Segmented stacks are only implemented on ELF."); - // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both // 32 and 64 bit) and for all 64-bit targets. if (StackAlignOverride) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e93f8e9..ccb9be0 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -93,6 +93,9 @@ protected: /// HasFMA4 - Target has 4-operand fused multiply-add bool HasFMA4; + /// HasXOP - Target has XOP instructions + bool HasXOP; + /// HasMOVBE - True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -198,6 +201,7 @@ public: bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } + bool hasXOP() const { return HasXOP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 1c9f3bd..126042e 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -31,9 +31,10 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false), + : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" "n8:16:32-S128" : @@ -52,9 +53,10 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true), + : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), @@ -67,11 +69,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, /// X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), - Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { // Determine the PICStyle based on the target selected. @@ -95,8 +98,11 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, } // default to hard float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Hard; + if (Options.FloatABIType == FloatABI::Default) + this->Options.FloatABIType = FloatABI::Hard; + + if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF()) + report_fatal_error("Segmented stacks are only implemented on ELF."); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 64be458..3ac1769 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -38,7 +38,7 @@ class X86TargetMachine : public LLVMTargetMachine { public: X86TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); @@ -85,7 +85,7 @@ class X86_32TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -113,7 +113,7 @@ class X86_64TargetMachine : public X86TargetMachine { X86JITInfo JITInfo; public: X86_64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 9bb54a8..f8c30eb 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -220,7 +220,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; DebugLoc dl = I->getDebugLoc(); - bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn(); + bool isControlFlow = MI->isCall() || MI->isReturn(); // Shortcut: don't need to check regular instructions in dirty state. if (!isControlFlow && CurState == ST_DIRTY) diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index d91da8c..de4abfc 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -21,17 +21,5 @@ add_llvm_target(XCoreCodeGen XCoreSelectionDAGInfo.cpp ) -add_llvm_library_dependencies(LLVMXCoreCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - LLVMXCoreDesc - LLVMXCoreInfo - ) - add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt index 1f7e2d5..53b4a9e 100644 --- a/lib/Target/XCore/LLVMBuild.txt +++ b/lib/Target/XCore/LLVMBuild.txt @@ -15,6 +15,9 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = MCTargetDesc TargetInfo + [component_0] type = TargetGroup name = XCore @@ -27,4 +30,3 @@ name = XCoreCodeGen parent = XCore required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo add_to_library_groups = XCore - diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt index 269822d..3a3f5b4 100644 --- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt @@ -3,11 +3,6 @@ add_llvm_library(LLVMXCoreDesc XCoreMCAsmInfo.cpp ) -add_llvm_library_dependencies(LLVMXCoreDesc - LLVMMC - LLVMXCoreInfo - ) - add_dependencies(LLVMXCoreDesc XCoreCommonTableGen) # Hack: we need to include 'main' target directory to grab private headers diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt index 628afb5..a80c939 100644 --- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt @@ -21,4 +21,3 @@ name = XCoreDesc parent = XCore required_libraries = MC XCoreInfo add_to_library_groups = XCore - diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt index 7f84f69..2c34b87 100644 --- a/lib/Target/XCore/TargetInfo/CMakeLists.txt +++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt @@ -4,10 +4,4 @@ add_llvm_library(LLVMXCoreInfo XCoreTargetInfo.cpp ) -add_llvm_library_dependencies(LLVMXCoreInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - add_dependencies(LLVMXCoreInfo XCoreCommonTableGen) diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt index d0b8e54..770ba87 100644 --- a/lib/Target/XCore/TargetInfo/LLVMBuild.txt +++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt @@ -21,4 +21,3 @@ name = XCoreInfo parent = XCore required_libraries = MC Support Target add_to_library_groups = XCore - diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index b8fb0ca..08f091e 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -24,7 +24,8 @@ namespace llvm { class XCoreTargetMachine; class formatted_raw_ostream; - FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM); + FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM, + CodeGenOpt::Level OptLevel); } // end namespace llvm; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 7f8b169..5007d04 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -84,7 +84,8 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti) } bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const { - return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MF.getFrameInfo()->hasVarSizedObjects(); } void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 8d746ae..7564fba 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -41,8 +41,8 @@ namespace { const XCoreSubtarget &Subtarget; public: - XCoreDAGToDAGISel(XCoreTargetMachine &TM) - : SelectionDAGISel(TM), + XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel), Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } @@ -83,8 +83,9 @@ namespace { /// createXCoreISelDag - This pass converts a legalized DAG into a /// XCore-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) { - return new XCoreDAGToDAGISel(TM); +FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new XCoreDAGToDAGISel(TM, OptLevel); } bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index d791daa..c5c668e 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -109,6 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::TRAP, MVT::Other, Legal); diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index eec3674..7e1e035 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -21,9 +21,10 @@ using namespace llvm; /// XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), @@ -34,7 +35,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, } bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) { - PM.add(createXCoreISelDag(*this)); + PM.add(createXCoreISelDag(*this, getOptLevel())); return false; } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 3f2644d..0159b1e 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 8fa66fc..58b3551 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -20,14 +20,3 @@ add_llvm_library(LLVMipo StripDeadPrototypes.cpp StripSymbols.cpp ) - -add_llvm_library_dependencies(LLVMipo - LLVMAnalysis - LLVMCore - LLVMInstCombine - LLVMScalarOpts - LLVMSupport - LLVMTarget - LLVMTransformUtils - LLVMipa - ) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c57e9fc..2e869e6 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -61,6 +62,7 @@ namespace { struct GlobalStatus; struct GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(ID) { @@ -84,7 +86,10 @@ namespace { } char GlobalOpt::ID = 0; -INITIALIZE_PASS(GlobalOpt, "globalopt", +INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -345,6 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { // and will invalidate our notion of what Init is. Constant *SubInit = 0; if (!isa<ConstantExpr>(GEP->getOperand(0))) { + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) @@ -828,6 +834,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { static void ConstantPropUsersOf(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (Constant *NewC = ConstantFoldInstruction(I)) { I->replaceAllUsesWith(NewC); @@ -1931,7 +1938,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { TargetData *TD = getAnalysisIfAvailable<TargetData>(); - Constant *New = ConstantFoldConstantExpression(CE, TD); + TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + Constant *New = ConstantFoldConstantExpression(CE, TD, TLI); if (New && New != CE) GV->setInitializer(New); } @@ -2304,7 +2312,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, DenseMap<Constant*, Constant*> &MutatedMemory, std::vector<GlobalVariable*> &AllocaTmps, SmallPtrSet<Constant*, 8> &SimpleConstants, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) @@ -2461,7 +2470,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals)) { + if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; } else { return false; @@ -2473,7 +2482,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, Constant *RetVal; // Execute the call, if successful, use the return value. if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, - MutatedMemory, AllocaTmps, SimpleConstants, TD)) + MutatedMemory, AllocaTmps, SimpleConstants, TD, + TLI)) return false; InstResult = RetVal; } @@ -2535,7 +2545,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (!CurInst->use_empty()) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) - InstResult = ConstantFoldConstantExpression(CE, TD); + InstResult = ConstantFoldConstantExpression(CE, TD, TLI); Values[CurInst] = InstResult; } @@ -2547,7 +2557,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { +static bool EvaluateStaticConstructor(Function *F, const TargetData *TD, + const TargetLibraryInfo *TLI) { /// MutatedMemory - For each store we execute, we update this map. Loads /// check this to get the most up-to-date value. If evaluation is successful, /// this state is committed to the process. @@ -2572,7 +2583,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { bool EvalSuccess = EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>(), CallStack, MutatedMemory, AllocaTmps, - SimpleConstants, TD); + SimpleConstants, TD, TLI); if (EvalSuccess) { // We succeeded at evaluation: commit the result. @@ -2601,8 +2612,6 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { return EvalSuccess; } - - /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. /// Return true if anything changed. bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { @@ -2611,6 +2620,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (Ctors.empty()) return false; const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + // Loop over global ctors, optimizing them when we can. for (unsigned i = 0; i != Ctors.size(); ++i) { Function *F = Ctors[i]; @@ -2628,7 +2639,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (F->empty()) continue; // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F, TD)) { + if (EvaluateStaticConstructor(F, TD, TLI)) { Ctors.erase(Ctors.begin()+i); MadeChange = true; --i; diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index 884faca..b358fab 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -21,4 +21,3 @@ name = IPO parent = Transforms library_name = ipo required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils - diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 8fdfd72..f63f532 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -21,7 +21,6 @@ #include "llvm/DefaultPasses.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO.h" @@ -101,6 +100,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(Inliner); Inliner = 0; } + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); return; } @@ -340,4 +340,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, PassManagerBase *LPM = unwrap(PM); Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); } - diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index a46d5ad..d070ccc 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) - -add_llvm_library_dependencies(LLVMInstCombine - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 3808278..464e9d0 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -22,6 +22,7 @@ namespace llvm { class CallSite; class TargetData; + class TargetLibraryInfo; class DbgDeclareInst; class MemIntrinsic; class MemSetInst; @@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; + TargetLibraryInfo *TLI; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -92,9 +94,11 @@ public: bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const; - + TargetData *getTargetData() const { return TD; } + TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } + // Visitation implementation - Implement instruction combining for different // instruction types. The semantics are as follows: // Return Value: diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e8136ab..27c7c54 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Get the current byte offset into the thing. Use the original // operand in case we're looking through a bitcast. SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + if (!GEP->getPointerOperandType()->isPointerTy()) + return 0; Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); Op1 = GEP->getPointerOperand()->stripPointerCasts(); @@ -960,7 +962,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 2 : 0); + int ix = FTy->getNumParams(); // See if we can optimize any arguments passed through the varargs area of // the call. for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index f10e48a..46e4acd 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } - - /// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. @@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(CE, TD); + C = ConstantFoldConstantExpression(CE, TD, TLI); return C; } @@ -528,9 +527,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, return ReplaceInstUsesWith(CI, In); } - - - + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. // zext (X == 1) to i32 --> X iff X has only the low bit set. @@ -1213,10 +1210,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) - // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it. CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); - if (Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "sqrt" && + if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && + Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) && Call->getNumArgOperands() == 1 && Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bb1cbfa..144b92b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -284,7 +284,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, - CompareRHS, TD); + CompareRHS, TD, TLI); // If the result is undef for this element, ignore it. if (isa<UndefValue>(C)) { // Extend range state machines to cover this element in case there is an @@ -1657,6 +1657,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) return 0; + // This is only really a signed overflow check if the inputs have been + // sign-extended; check for that condition. For example, if CI2 is 2^31 and + // the operands of the add are 64 bits wide, we need at least 33 sign bits. + unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; + if (IC.ComputeNumSignBits(A) < NeededSignBits || + IC.ComputeNumSignBits(B) < NeededSignBits) + return 0; + // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant // and truncates that discard the high bits of the add. Verify that this is @@ -1787,6 +1795,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // comparing -val or val with non-zero is the same as just comparing val + // ie, abs(val) != 0 -> val != 0 + if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) + { + Value *Cond, *SelectTrue, *SelectFalse; + if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), + m_Value(SelectFalse)))) { + if (Value *V = dyn_castNegVal(SelectTrue)) { + if (V == SelectFalse) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + else if (Value *V = dyn_castNegVal(SelectFalse)) { + if (V == SelectTrue) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + } + } + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 91e60a4..f1ea8ea 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -282,7 +282,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Trivial replacement. if (V == Op) return RepOp; @@ -294,17 +295,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD); + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD); + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); } // Same for CmpInsts. if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD); + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, + TLI); if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD); + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, + TLI); } // TODO: We could hand off more cases to instsimplify here. @@ -330,7 +333,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps, TD); + ConstOps, TD, TLI); } } @@ -479,18 +482,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) return ReplaceInstUsesWith(SI, TrueVal); } @@ -679,6 +682,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateOr(CondVal, FalseVal); else if (CondVal == FalseVal) return BinaryOperator::CreateAnd(CondVal, TrueVal); + + // select a, ~a, b -> (~a)&b + // select a, b, ~a -> (~a)|b + if (match(TrueVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateAnd(TrueVal, FalseVal); + else if (match(FalseVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateOr(TrueVal, FalseVal); } // Selecting between two integer constants? diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 6d85add..702e0f2 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - V = ConstantFoldConstantExpression(CE, IC.getTargetData()); + V = ConstantFoldConstantExpression(CE, IC.getTargetData(), + IC.getTargetLibraryInfo()); return V; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index a7a6311..af065cd 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { } char InstCombiner::ID = 0; -INITIALIZE_PASS(InstCombiner, "instcombine", +INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine", + "Combine redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstCombiner, "instcombine", "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } @@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { MadeChange = true; } - if ((*I)->getType() != IntPtrTy) { + Type *IndexTy = (*I)->getType(); + if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. @@ -909,7 +915,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType()); + // We do not handle pointer-vector geps here + if (!StrippedPtr) + return 0; + if (StrippedPtr != PtrOp && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { @@ -1798,7 +1808,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { static bool AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet<BasicBlock*, 64> &Visited, InstCombiner &IC, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; Worklist.push_back(BB); @@ -1825,7 +1836,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); @@ -1843,7 +1854,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Constant*& FoldRes = FoldedConstants[CE]; if (!FoldRes) - FoldRes = ConstantFoldConstantExpression(CE, TD); + FoldRes = ConstantFoldConstantExpression(CE, TD, TLI); if (!FoldRes) FoldRes = CE; @@ -1909,7 +1920,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // the reachable instructions. Ignore blocks that are not reachable. Keep // track of which blocks we visit. SmallPtrSet<BasicBlock*, 64> Visited; - MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); + MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD, + TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents @@ -1954,7 +1966,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && isa<Constant>(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, TD)) { + if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. @@ -2062,7 +2074,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); - + TLI = &getAnalysis<TargetLibraryInfo>(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt index b73c303..62c6161 100644 --- a/lib/Transforms/InstCombine/LLVMBuild.txt +++ b/lib/Transforms/InstCombine/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = InstCombine parent = Transforms required_libraries = Analysis Core Support Target TransformUtils - diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b617539..4cc5727 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -55,8 +55,11 @@ static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *kAsanModuleCtorName = "asan.module_ctor"; +static const char *kAsanModuleDtorName = "asan.module_dtor"; +static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; +static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanInitName = "__asan_init"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -434,6 +437,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRBuilder<> IRB1(CheckTerm); Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize); Crash->setDebugLoc(OrigIns->getDebugLoc()); + ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C)); } // This function replaces all global variables with new variables that have @@ -517,7 +521,11 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); + SmallString<2048> DescriptionOfGlobal = G->getName(); + DescriptionOfGlobal += " ("; + DescriptionOfGlobal += M.getModuleIdentifier(); + DescriptionOfGlobal += ")"; + GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -558,6 +566,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, n)); + // We also need to unregister globals at the end, e.g. when a shared library + // gets closed. + Function *AsanDtorFunction = Function::Create( + FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); + BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); + IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); + Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction( + kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); + + IRB_Dtor.CreateCall2(AsanUnregisterGlobals, + IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, n)); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority); + DEBUG(dbgs() << M); return true; } @@ -631,7 +655,7 @@ bool AddressSanitizer::runOnModule(Module &M) { Res |= handleFunction(M, *F); } - appendToGlobalCtors(M, AsanCtorFunction, 1 /*high priority*/); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); return Res; } @@ -951,8 +975,8 @@ BlackList::BlackList(const std::string &Path) { OwningPtr<MemoryBuffer> File; if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) { - errs() << EC.message(); - exit(1); + report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " + + EC.message()); } MemoryBuffer *Buff = File.take(); const char *Data = Buff->getBufferStart(); @@ -962,15 +986,23 @@ BlackList::BlackList(const std::string &Path) { for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { if (Lines[i].startswith(kFunPrefix)) { std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); - if (Fun.size()) { - Fun += "|"; - } + std::string ThisFuncRE; // add ThisFunc replacing * with .* for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { if (ThisFunc[j] == '*') - Fun += '.'; - Fun += ThisFunc[j]; + ThisFuncRE += '.'; + ThisFuncRE += ThisFunc[j]; } + // Check that the regexp is valid. + Regex CheckRE(ThisFuncRE); + std::string Error; + if (!CheckRE.isValid(Error)) + report_fatal_error("malformed blacklist regex: " + ThisFunc + + ": " + Error); + // Append to the final regexp. + if (Fun.size()) + Fun += "|"; + Fun += ThisFuncRE; } } if (Fun.size()) { diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 929b7cd..a4a1fef 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -7,10 +7,3 @@ add_llvm_library(LLVMInstrumentation PathProfiling.cpp ProfilingUtils.cpp ) - -add_llvm_library_dependencies(LLVMInstrumentation - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTransformUtils - ) diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index ccf7e11..96e5d5b 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -43,12 +43,14 @@ namespace { public: static char ID; GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) { + : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), + UseExtraChecksum(false) { initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false) + GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false, + bool useExtraChecksum = false) : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - Use402Format(use402Format) { + Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) { assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } @@ -94,6 +96,7 @@ namespace { bool EmitNotes; bool EmitData; bool Use402Format; + bool UseExtraChecksum; Module *M; LLVMContext *Ctx; @@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - bool Use402Format) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format); + bool Use402Format, + bool UseExtraChecksum) { + return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum); } namespace { @@ -167,7 +171,7 @@ namespace { } uint32_t length() { - // Here 2 = 1 for string lenght + 1 for '0' id#. + // Here 2 = 1 for string length + 1 for '0' id#. return lengthOfGCOVString(Filename) + 2 + Lines.size(); } @@ -244,10 +248,12 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) { + GCOVFunction(DISubprogram SP, raw_ostream *os, + bool Use402Format, bool UseExtraChecksum) { this->os = os; Function *F = SP.getFunction(); + DEBUG(dbgs() << "Function: " << F->getName() << "\n"); uint32_t i = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { Blocks[BB] = new GCOVBlock(i++, os); @@ -257,14 +263,14 @@ namespace { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (!Use402Format) - ++BlockLen; // For second checksum. + if (UseExtraChecksum) + ++BlockLen; write(BlockLen); uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); write(Ident); - write(0); // checksum #1 - if (!Use402Format) - write(0); // checksum #2 + write(0); // lineno checksum + if (UseExtraChecksum) + write(0); // cfg checksum writeGCOVString(SP.getName()); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); @@ -290,6 +296,7 @@ namespace { for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { write(0); // No flags on our blocks. } + DEBUG(dbgs() << Blocks.size() << " blocks.\n"); // Emit edges between blocks. for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), @@ -301,6 +308,8 @@ namespace { write(Block.OutEdges.size() * 2 + 1); write(Block.Number); for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { + DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number + << "\n"); write(Block.OutEdges[i]->Number); write(0); // no flags } @@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) { } void GCOVProfiler::emitGCNO() { - DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (CU_Nodes) { - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - // Each compile unit gets its own .gcno file. This means that whether we run - // this pass over the original .o's as they're produced, or run it after - // LTO, we'll generate the same .gcno files. - - DICompileUnit CU(CU_Nodes->getOperand(i)); - raw_fd_ostream *&out = GcnoFiles[CU]; - std::string ErrorInfo; - out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, - raw_fd_ostream::F_Binary); - if (!Use402Format) - out->write("oncg*404MVLL", 12); - else - out->write("oncg*204MVLL", 12); - - DIArray SPs = CU.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - if (!SP.Verify()) continue; - raw_fd_ostream *&os = GcnoFiles[CU]; - - Function *F = SP.getFunction(); - if (!F) continue; - GCOVFunction Func(SP, os, Use402Format); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); - TerminatorInst *TI = BB->getTerminator(); - if (int successors = TI->getNumSuccessors()) { - for (int i = 0; i != successors; ++i) { - Block.addEdge(Func.getBlock(TI->getSuccessor(i))); - } - } else if (isa<ReturnInst>(TI)) { - Block.addEdge(Func.getReturnBlock()); - } - - uint32_t Line = 0; - for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { - const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; - if (Line == Loc.getLine()) continue; - Line = Loc.getLine(); - if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; - - GCOVLines &Lines = Block.getFile(SP.getFilename()); - Lines.addLine(Loc.getLine()); + if (!CU_Nodes) return; + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(CU_Nodes->getOperand(i)); + std::string ErrorInfo; + raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, + raw_fd_ostream::F_Binary); + if (!Use402Format) + out.write("oncg*404MVLL", 12); + else + out.write("oncg*204MVLL", 12); + + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); + I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); } - Func.writeOut(); } + Func.writeOut(); } - } - - for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator - I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) { - raw_fd_ostream *&out = I->second; - out->write("\0\0\0\0\0\0\0\0", 8); // EOF - out->close(); - delete out; + out.write("\0\0\0\0\0\0\0\0", 8); // EOF + out.close(); } } diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt index f302d03..d36ad54 100644 --- a/lib/Transforms/Instrumentation/LLVMBuild.txt +++ b/lib/Transforms/Instrumentation/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = Instrumentation parent = Transforms required_libraries = Analysis Core Support TransformUtils - diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt index d36b898..b2ef49a 100644 --- a/lib/Transforms/LLVMBuild.txt +++ b/lib/Transforms/LLVMBuild.txt @@ -15,8 +15,10 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = IPO InstCombine Instrumentation Scalar Utils + [component_0] type = Group name = Transforms parent = Libraries - diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index a6f0cf3..d660c72 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -32,12 +32,3 @@ add_llvm_library(LLVMScalarOpts Sink.cpp TailRecursionElimination.cpp ) - -add_llvm_library_dependencies(LLVMScalarOpts - LLVMAnalysis - LLVMCore - LLVMInstCombine - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index f8f18b2..f9abfe9 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -69,6 +70,7 @@ namespace { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + const TargetLibraryInfo *TLInfo; DominatorTree *DT; ProfileInfo *PFI; @@ -97,6 +99,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); AU.addPreserved<ProfileInfo>(); + AU.addRequired<TargetLibraryInfo>(); } private: @@ -116,7 +119,10 @@ namespace { } char CodeGenPrepare::ID = 0; -INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", +INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare", "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { @@ -127,6 +133,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; ModifiedDT = false; + TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); @@ -542,7 +549,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { WeakVH IterHandle(CurInstIterator); ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, - ModifiedDT ? 0 : DT); + TLInfo, ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 664c3f6..5430f62 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -24,6 +24,8 @@ #include "llvm/Constant.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" #include <set> @@ -42,19 +44,22 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } }; } char ConstantPropagation::ID = 0; -INITIALIZE_PASS(ConstantPropagation, "constprop", +INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop", + "Simple constant propagation", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(ConstantPropagation, "constprop", "Simple constant propagation", false, false) FunctionPass *llvm::createConstantPropagationPass() { return new ConstantPropagation(); } - bool ConstantPropagation::runOnFunction(Function &F) { // Initialize the worklist to all of the instructions ready to process... std::set<Instruction*> WorkList; @@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.insert(&*i); } bool Changed = false; + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); while (!WorkList.empty()) { Instruction *I = *WorkList.begin(); WorkList.erase(WorkList.begin()); // Get an element from the worklist... if (!I->use_empty()) // Don't muck with dead instructions... - if (Constant *C = ConstantFoldInstruction(I)) { + if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { // Add all of the users of this instruction to the worklist, they might // be constant propagatable now... for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f5688cb..8729019 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -416,7 +416,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // writes to addresses which will definitely be overwritten later if (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) && - LaterOff + Later.Size >= EarlierOff + Earlier.Size) + int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)) return OverwriteEnd; // Otherwise, they don't completely overlap. @@ -624,6 +624,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, BasicBlock *BB, DominatorTree *DT) { for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { BasicBlock *Pred = *I; + if (Pred == BB) continue; TerminatorInst *PredTI = Pred->getTerminator(); if (PredTI->getNumSuccessors() != 1) continue; @@ -853,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, I != E; ++I) DeadStackObjects.erase(*I); } - diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index c0223d2..5241e11 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" @@ -215,6 +216,7 @@ namespace { class EarlyCSE : public FunctionPass { public: const TargetData *TD; + const TargetLibraryInfo *TLI; DominatorTree *DT; typedef RecyclingAllocator<BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; @@ -263,6 +265,7 @@ private: // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); AU.setPreservesCFG(); } }; @@ -277,6 +280,7 @@ FunctionPass *llvm::createEarlyCSEPass() { INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) bool EarlyCSE::processNode(DomTreeNode *Node) { @@ -328,7 +332,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, TD, DT)) { + if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); Inst->replaceAllUsesWith(V); Inst->eraseFromParent(); @@ -455,6 +459,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { bool EarlyCSE::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTree>(); // Tables that the pass uses when walking the domtree. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a51cbb6..374fdd7 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" @@ -446,7 +447,8 @@ namespace { MemoryDependenceAnalysis *MD; DominatorTree *DT; const TargetData *TD; - + const TargetLibraryInfo *TLI; + ValueTable VN; /// LeaderTable - A mapping from value numbers to lists of Value*'s that @@ -530,6 +532,7 @@ namespace { // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); if (!NoLoads) AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); @@ -568,6 +571,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) @@ -2032,7 +2036,7 @@ bool GVN::processInstruction(Instruction *I) { // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. - if (Value *V = SimplifyInstruction(I, TD, DT)) { + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { I->replaceAllUsesWith(V); if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); @@ -2134,6 +2138,7 @@ bool GVN::runOnFunction(Function& F) { MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); VN.setDomTree(DT); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 0772b48..ad8689a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -182,7 +182,7 @@ bool GlobalMerge::doInitialization(Module &M) { continue; // Ignore fancy-aligned globals for now. - unsigned Alignment = I->getAlignment(); + unsigned Alignment = TD->getPreferredAlignment(I); Type *Ty = I->getType()->getElementType(); if (Alignment > TD->getABITypeAlignment(Ty)) continue; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 1f21108..6d52b22 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -58,18 +58,16 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced"); STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); -namespace llvm { - cl::opt<bool> EnableIVRewrite( - "enable-iv-rewrite", cl::Hidden, - cl::desc("Enable canonical induction variable rewriting")); - - // Trip count verification can be enabled by default under NDEBUG if we - // implement a strong expression equivalence checker in SCEV. Until then, we - // use the verify-indvars flag, which may assert in some cases. - cl::opt<bool> VerifyIndvars( - "verify-indvars", cl::Hidden, - cl::desc("Verify the ScalarEvolution result after running indvars")); -} +static cl::opt<bool> EnableIVRewrite( + "enable-iv-rewrite", cl::Hidden, + cl::desc("Enable canonical induction variable rewriting")); + +// Trip count verification can be enabled by default under NDEBUG if we +// implement a strong expression equivalence checker in SCEV. Until then, we +// use the verify-indvars flag, which may assert in some cases. +static cl::opt<bool> VerifyIndvars( + "verify-indvars", cl::Hidden, + cl::desc("Verify the ScalarEvolution result after running indvars")); namespace { class IndVarSimplify : public LoopPass { @@ -180,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { // base of a recurrence. This handles the case in which SCEV expansion // converts a pointer type recurrence into a nonrecurrent pointer base // indexed by an integer recurrence. + + // If the GEP base pointer is a vector of pointers, abort. + if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) + return false; + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); if (FromBase == ToBase) @@ -946,9 +949,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { else return 0; + // When creating this AddExpr, don't apply the current operations NSW or NUW + // flags. This instruction may be guarded by control flow that the no-wrap + // behavior depends on. Non-control-equivalent instructions can be mapped to + // the same SCEV expression, and it would be incorrect to transfer NSW/NUW + // semantics to those operations. const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>( - SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr, - IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW)); + SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); if (!AddRec || AddRec->getLoop() != L) return 0; @@ -1231,7 +1238,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, /// BackedgeTakenInfo. If these expressions have not been reduced, then /// expanding them may incur additional cost (albeit in the loop preheader). static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, + SmallPtrSet<const SCEV*, 8> &Processed, ScalarEvolution *SE) { + if (!Processed.insert(S)) + return false; + // If the backedge-taken count is a UDiv, it's very likely a UDiv that // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a // precise expression, rather than a UDiv from the user's code. If we can't @@ -1259,7 +1270,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) { - if (isHighCostExpansion(*I, BI, SE)) + if (isHighCostExpansion(*I, BI, Processed, SE)) return true; } return false; @@ -1302,7 +1313,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { if (!BI) return false; - if (isHighCostExpansion(BackedgeTakenCount, BI, SE)) + SmallPtrSet<const SCEV*, 8> Processed; + if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) return false; return true; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index f410af3..c78db3f 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" @@ -75,6 +76,7 @@ namespace { /// class JumpThreading : public FunctionPass { TargetData *TD; + TargetLibraryInfo *TLI; LazyValueInfo *LVI; #ifdef NDEBUG SmallPtrSet<BasicBlock*, 16> LoopHeaders; @@ -107,6 +109,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LazyValueInfo>(); AU.addPreserved<LazyValueInfo>(); + AU.addRequired<TargetLibraryInfo>(); } void FindLoopHeaders(Function &F); @@ -133,6 +136,7 @@ char JumpThreading::ID = 0; INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) @@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); LVI = &getAnalysis<LazyValueInfo>(); FindLoopHeaders(F); @@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Run constant folding to see if we can reduce the condition to a simple // constant. if (Instruction *I = dyn_cast<Instruction>(Condition)) { - Value *SimpleVal = ConstantFoldInstruction(I, TD); + Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI); if (SimpleVal) { I->replaceAllUsesWith(SimpleVal); I->eraseFromParent(); @@ -921,8 +926,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Split them out to their own block. UnavailablePred = - SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), - "thread-pre-split", this); + SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this); } // If the value isn't available in all predecessors, then there will be @@ -1334,8 +1338,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), - ".thr_comm", this); + PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); } // And finally, do it! @@ -1479,8 +1482,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), - ".thr_comm", this); + PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); } // Okay, we decided to do this! Clone all the instructions in BB onto the end diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 8098b36..8795cd8 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -43,8 +43,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -84,6 +87,7 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved("scalar-evolution"); AU.addPreservedID(LoopSimplifyID); + AU.addRequired<TargetLibraryInfo>(); } bool doFinalization() { @@ -96,6 +100,9 @@ namespace { LoopInfo *LI; // Current LoopInfo DominatorTree *DT; // Dominator Tree for the current Loop. + TargetData *TD; // TargetData for constant folding. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + // State that is updated as we process loops. bool Changed; // Set to true when we change anything. BasicBlock *Preheader; // The preheader block of the current loop... @@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) @@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + CurAST = new AliasSetTracker(*AA); // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); @@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) { // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just // fold it. - if (Constant *C = ConstantFoldInstruction(&I)) { + if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); CurAST->deleteValue(&I); @@ -369,7 +380,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load"))) + if (LI->getMetadata("invariant.load")) return true; // Don't hoist loads which have may-aliased stores in loop. @@ -581,7 +592,7 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (Inst.isSafeToSpeculativelyExecute()) + if (isSafeToSpeculativelyExecute(&Inst)) return true; return isGuaranteedToExecute(Inst); diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt index 027634d..cee9119 100644 --- a/lib/Transforms/Scalar/LLVMBuild.txt +++ b/lib/Transforms/Scalar/LLVMBuild.txt @@ -21,4 +21,3 @@ name = Scalar parent = Transforms library_name = ScalarOpts required_libraries = Analysis Core InstCombine Support Target TransformUtils - diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index af25c5c..f0f05e6 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" @@ -43,6 +44,7 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); AU.addPreserved("scalar-evolution"); + AU.addRequired<TargetLibraryInfo>(); } }; } @@ -50,6 +52,7 @@ namespace { char LoopInstSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", "Simplify instructions in loops", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = &getAnalysis<LoopInfo>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, TD, DT); + Value *V = SimplifyInstruction(I, TD, TLI, DT); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ae51d5..840614e 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -77,19 +77,17 @@ #include <algorithm> using namespace llvm; -namespace llvm { -cl::opt<bool> EnableNested( +static cl::opt<bool> EnableNested( "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); -cl::opt<bool> EnableRetry( - "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); +static cl::opt<bool> EnableRetry( + "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); // Temporary flag to cleanup congruent phis after LSR phi expansion. // It's currently disabled until we can determine whether it's truly useful or // not. The flag should be removed after the v3.0 release. -cl::opt<bool> EnablePhiElim( - "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); -} +static cl::opt<bool> EnablePhiElim( + "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); namespace { @@ -636,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) { return AccessTy; } +/// isExistingPhi - Return true if this AddRec is already a phi in its loop. +static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { + for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(AR->getType())) && + SE.getSCEV(PN) == AR) + return true; + } + return false; +} + /// DeleteTriviallyDeadInstructions - If any of the instructions is the /// specified set are trivially dead, delete them and see if this makes any of /// their operands subsequently dead. @@ -705,7 +716,8 @@ public: const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, const SmallVectorImpl<int64_t> &Offsets, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs = 0); void print(raw_ostream &OS) const; void dump() const; @@ -718,7 +730,8 @@ private: void RatePrimaryRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 16> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs); }; } @@ -738,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg, // on other loops, and cannot be expected to change sibling loops. If the // AddRec exists, consider it's register free and leave it alone. Otherwise, // do not consider this formula at all. - // FIXME: why do we need to generate such fomulae? else if (!EnableNested || L->contains(AR->getLoop()) || (!AR->getLoop()->contains(L) && DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { - for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (SE.isSCEVable(PN->getType()) && - (SE.getEffectiveSCEVType(PN->getType()) == - SE.getEffectiveSCEVType(AR->getType())) && - SE.getSCEV(PN) == AR) - return; - } + if (isExistingPhi(AR, SE)) + return; + + // For !EnableNested, never rewrite IVs in other loops. if (!EnableNested) { Loose(); return; @@ -791,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg, } /// RatePrimaryRegister - Record this register in the set. If we haven't seen it -/// before, rate it. +/// before, rate it. Optional LoserRegs provides a way to declare any formula +/// that refers to one of those regs an instant loser. void Cost::RatePrimaryRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 16> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { - if (Regs.insert(Reg)) + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs) { + if (LoserRegs && LoserRegs->count(Reg)) { + Loose(); + return; + } + if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); + if (isLoser()) + LoserRegs->insert(Reg); + } } void Cost::RateFormula(const Formula &F, @@ -805,14 +822,15 @@ void Cost::RateFormula(const Formula &F, const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, const SmallVectorImpl<int64_t> &Offsets, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs) { // Tally up the registers. if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { Loose(); return; } - RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs); if (isLoser()) return; } @@ -823,7 +841,7 @@ void Cost::RateFormula(const Formula &F, Loose(); return; } - RatePrimaryRegister(BaseReg, Regs, L, SE, DT); + RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs); if (isLoser()) return; } @@ -1105,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) { Formulae.push_back(F); // Record registers now being used by this use. - if (F.ScaledReg) Regs.insert(F.ScaledReg); Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); return true; @@ -1116,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) { if (&F != &Formulae.back()) std::swap(F, Formulae.back()); Formulae.pop_back(); - assert(!Formulae.empty() && "LSRUse has no formulae left!"); } /// RecomputeRegs - Recompute the Regs field, and update RegUses. @@ -1389,7 +1405,6 @@ class LSRInstance { LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); -public: void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void CountRegisters(const Formula &F, size_t LUIdx); @@ -1450,6 +1465,7 @@ public: void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Pass *P); +public: LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); bool getChanged() const { return Changed; } @@ -2045,7 +2061,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() { do { const SCEV *S = Worklist.pop_back_val(); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - Strides.insert(AR->getStepRecurrence(SE)); + if (EnableNested || AR->getLoop() == L) + Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { Worklist.append(Add->op_begin(), Add->op_end()); @@ -2914,6 +2931,7 @@ LSRInstance::GenerateAllReuseFormulae() { void LSRInstance::FilterOutUndesirableDedicatedRegisters() { DenseSet<const SCEV *> VisitedRegs; SmallPtrSet<const SCEV *, 16> Regs; + SmallPtrSet<const SCEV *, 16> LoserRegs; #ifndef NDEBUG bool ChangedFormulae = false; #endif @@ -2933,46 +2951,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { FIdx != NumForms; ++FIdx) { Formula &F = LU.Formulae[FIdx]; - SmallVector<const SCEV *, 2> Key; - for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), - JE = F.BaseRegs.end(); J != JE; ++J) { - const SCEV *Reg = *J; - if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) - Key.push_back(Reg); + // Some formulas are instant losers. For example, they may depend on + // nonexistent AddRecs from other loops. These need to be filtered + // immediately, otherwise heuristics could choose them over others leading + // to an unsatisfactory solution. Passing LoserRegs into RateFormula here + // avoids the need to recompute this information across formulae using the + // same bad AddRec. Passing LoserRegs is also essential unless we remove + // the corresponding bad register from the Regs set. + Cost CostF; + Regs.clear(); + CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, + &LoserRegs); + if (CostF.isLoser()) { + // During initial formula generation, undesirable formulae are generated + // by uses within other loops that have some non-trivial address mode or + // use the postinc form of the IV. LSR needs to provide these formulae + // as the basis of rediscovering the desired formula that uses an AddRec + // corresponding to the existing phi. Once all formulae have been + // generated, these initial losers may be pruned. + DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); + dbgs() << "\n"); } - if (F.ScaledReg && - RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) - Key.push_back(F.ScaledReg); - // Unstable sort by host order ok, because this is only used for - // uniquifying. - std::sort(Key.begin(), Key.end()); - - std::pair<BestFormulaeTy::const_iterator, bool> P = - BestFormulae.insert(std::make_pair(Key, FIdx)); - if (!P.second) { + else { + SmallVector<const SCEV *, 2> Key; + for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), + JE = F.BaseRegs.end(); J != JE; ++J) { + const SCEV *Reg = *J; + if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) + Key.push_back(Reg); + } + if (F.ScaledReg && + RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) + Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for + // uniquifying. + std::sort(Key.begin(), Key.end()); + + std::pair<BestFormulaeTy::const_iterator, bool> P = + BestFormulae.insert(std::make_pair(Key, FIdx)); + if (P.second) + continue; + Formula &Best = LU.Formulae[P.first->second]; - Cost CostF; - CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT); - Regs.clear(); Cost CostBest; - CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); Regs.clear(); + CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); if (CostF < CostBest) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" " in favor of formula "; Best.print(dbgs()); dbgs() << '\n'); + } #ifndef NDEBUG - ChangedFormulae = true; + ChangedFormulae = true; #endif - LU.DeleteFormula(F); - --FIdx; - --NumForms; - Any = true; - continue; - } + LU.DeleteFormula(F); + --FIdx; + --NumForms; + Any = true; } // Now that we've filtered out some formulae, recompute the Regs set. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 37f4c2c..22dbfe3 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); -// Temporary flag to be removed in 3.0 static cl::opt<bool> -NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden, - cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling")); +UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, + cl::desc("Unroll loops with run-time trip counts")); namespace { class LoopUnroll : public LoopPass { @@ -68,6 +67,10 @@ namespace { // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; + // Default unroll count for loops with run-time trip count if + // -unroll-count is not set + static const unsigned UnrollRuntimeCount = 8; + unsigned CurrentCount; unsigned CurrentThreshold; bool CurrentAllowPartial; @@ -148,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Find trip count and trip multiple if count is not available unsigned TripCount = 0; unsigned TripMultiple = 1; - if (!NoSCEVUnroll) { - // Find "latch trip count". UnrollLoop assumes that control cannot exit - // via the loop latch on any iteration prior to TripCount. The loop may exit - // early via an earlier branch. - BasicBlock *LatchBlock = L->getLoopLatch(); - if (LatchBlock) { - TripCount = SE->getSmallConstantTripCount(L, LatchBlock); - TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); - } - } - else { - TripCount = L->getSmallConstantTripCount(); - if (TripCount == 0) - TripMultiple = L->getSmallConstantTripMultiple(); + // Find "latch trip count". UnrollLoop assumes that control cannot exit + // via the loop latch on any iteration prior to TripCount. The loop may exit + // early via an earlier branch. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + TripCount = SE->getSmallConstantTripCount(L, LatchBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); } - // Automatically select an unroll count. + // Use a default unroll-count if the user doesn't specify a value + // and the trip count is a run-time value. The default is different + // for run-time or compile-time trip count loops. unsigned Count = CurrentCount; + if (UnrollRuntime && CurrentCount == 0 && TripCount == 0) + Count = UnrollRuntimeCount; + if (Count == 0) { // Conservative heuristic: if we know the trip count, see if we can // completely unroll (subject to the threshold, checked below); otherwise @@ -189,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (TripCount != 1 && Size > Threshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count << " because size: " << Size << ">" << Threshold << "\n"); - if (!CurrentAllowPartial) { + if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = Threshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) { - Count--; + if (TripCount) { + // Reduce unroll count to be modulo of TripCount for partial unrolling + Count = CurrentThreshold / LoopSize; + while (Count != 0 && TripCount%Count != 0) + Count--; + } + else if (UnrollRuntime) { + // Reduce unroll count to be a lower power-of-two value + while (Count != 0 && Size > CurrentThreshold) { + Count >>= 1; + Size = LoopSize*Count; + } } if (Count < 2) { DEBUG(dbgs() << " could not unroll partially\n"); @@ -208,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM)) + if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM)) return false; return true; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 458949c..a2d0e98 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -71,7 +71,9 @@ namespace { // LoopProcessWorklist - Used to check if second loop needs processing // after RewriteLoopBodyWithConditionConstant rewrites first loop. std::vector<Loop*> LoopProcessWorklist; - SmallPtrSet<Value *,8> UnswitchedVals; + + // FIXME: Consider custom class for this. + std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals; bool OptimizeForSize; bool redoLoop; @@ -117,7 +119,15 @@ namespace { private: virtual void releaseMemory() { - UnswitchedVals.clear(); + // We need to forget about all switches in the current loop. + // FIXME: Do it better than enumerating all blocks of code + // and see if it is a switch instruction. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); I != E; ++I) { + SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator()); + if (SI) + UnswitchedVals.erase(SI); + } } /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, @@ -128,6 +138,12 @@ namespace { if (I != LoopProcessWorklist.end()) LoopProcessWorklist.erase(I); } + + /// For new loop switches we clone info about values that was + /// already unswitched and has redundant successors. + /// Note, that new loop data is stored inside the VMap. + void CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB); void initLoopData() { loopHeader = currentLoop->getHeader(); @@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() { } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); - if (LoopCond && SI->getNumCases() > 1) { + unsigned NumCases = SI->getNumCases(); + if (LoopCond && NumCases > 1) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = SI->getCaseValue(1); + Constant *UnswitchVal = NULL; + // Do not process same value again and again. - if (!UnswitchedVals.insert(UnswitchVal)) + // At this point we have some cases already unswitched and + // some not yet unswitched. Let's find the first not yet unswitched one. + for (unsigned i = 1; i < NumCases; ++i) { + Constant* UnswitchValCandidate = SI->getCaseValue(i); + if (!UnswitchedVals[SI].count(UnswitchValCandidate)) { + UnswitchVal = UnswitchValCandidate; + break; + } + } + + if (!UnswitchVal) continue; if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { @@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } +/// For new loop switches we clone info about values that was +/// already unswitched and has redundant successors. +/// Not that new loop data is stored inside the VMap. +void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB) { + + const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator()); + if (SI && UnswitchedVals.count(SI)) { + // Don't clone a totally simplified switch. + if (isa<Constant>(SI->getCondition())) + return; + Value* I = VMap.lookup(SI); + assert(I && "All instructions that are in SrcBB must be in VMap."); + UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI]; + } +} + /// isTrivialLoopExitBlock - Check to see if all paths from BB exit the /// loop with no side effects (including infinite loops). /// @@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // Check to see if a successor of the switch is guaranteed to go to the // latch block or exit through a one exit block without having any // side-effects. If so, determine the value of Cond that causes it to do - // this. Note that we can't trivially unswitch on the default case. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + // this. + // Note that we can't trivially unswitch on the default case or + // on already unswitched cases. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock* LoopExitCandidate; + if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, SI->getSuccessor(i)))) { // Okay, we found a trivial case, remember the value that is trivial. - if (Val) *Val = SI->getCaseValue(i); + ConstantInt* CaseVal = SI->getCaseValue(i); + + // Check that it was not unswitched before, since already unswitched + // trivial vals are looks trivial too. + if (UnswitchedVals[SI].count(CaseVal)) + continue; + LoopExitBB = LoopExitCandidate; + if (Val) *Val = CaseVal; break; } + } } // If we didn't find a single unique LoopExit block, or if the loop exit block @@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. - if (Metrics.NumInsts > Threshold || - Metrics.NumBlocks * 5 > Threshold || + + unsigned NumUnswitched = + (NumSwitches + NumBranches) + 1 /*take in account current iteration*/; + + unsigned NumInsts = Metrics.NumInsts * NumUnswitched; + unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched; + + if (NumInsts > Threshold || NumBlocks * 5 > Threshold || Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " @@ -565,8 +627,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L, // Although SplitBlockPredecessors doesn't preserve loop-simplify in // general, if we call it on all predecessors of all exits then it does. if (!ExitBlock->isLandingPad()) { - SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), - ".us-lcssa", this); + SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this); } else { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", @@ -621,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); + + // Inherit simplified switches info for NewBB + // We needn't pass NewBB since its instructions are already contained + // inside the VMap. + CloneUnswitchedVals(VMap, LoopBlocks[i]); + NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); @@ -907,9 +974,13 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Instruction *U = dyn_cast<Instruction>(*UI); if (!U || !L->contains(U)) continue; - U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); } + + for (std::vector<Instruction*>::iterator UI = Worklist.begin(); + UI != Worklist.end(); ++UI) + (*UI)->replaceUsesOfWith(LIC, Replacement); + SimplifyCode(Worklist, L); return; } @@ -942,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, BasicBlock *Switch = SI->getParent(); BasicBlock *SISucc = SI->getSuccessor(DeadCase); BasicBlock *Latch = L->getLoopLatch(); + + UnswitchedVals[SI].insert(Val); + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. // If the DeadCase successor dominates the loop latch, then the // transformation isn't safe since it will delete the sole predecessor edge @@ -1017,7 +1091,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be // 'false'. - if (Value *V = SimplifyInstruction(I, 0, DT)) + if (Value *V = SimplifyInstruction(I, 0, 0, DT)) if (LI->replacementPreservesLCSSAForm(I, V)) { ReplaceUsesOfWith(I, V, Worklist, L, LPM); continue; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9e4f51f..7335626 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -147,8 +147,8 @@ struct MemsetRange { } // end anon namespace bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { - // If we found more than 8 stores to merge or 64 bytes, use memset. - if (TheStores.size() >= 8 || End-Start >= 64) return true; + // If we found more than 4 stores to merge or 16 bytes, use memset. + if (TheStores.size() >= 4 || End-Start >= 16) return true; // If there is nothing to merge, don't do anything. if (TheStores.size() < 2) return false; diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 80f5f01..8e9449f 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) { Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; - // Only consider values with pointer types, and not function pointers. + // Only consider values with pointer types. + // It seemes intuitive to exclude function pointer types as well, since + // functions are never reference-counted, however clang occasionally + // bitcasts reference-counted pointers to function-pointer type + // temporarily. PointerType *Ty = dyn_cast<PointerType>(Op->getType()); - if (!Ty || isa<FunctionType>(Ty->getElementType())) + if (!Ty) return false; // Conservatively assume anything else is a potential use. return true; @@ -896,8 +900,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) { #include "llvm/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseSet.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -1165,6 +1170,7 @@ namespace { /// Partial - True of we've seen an opportunity for partial RR elimination, /// such as pushing calls into a CFG triangle or into one side of a /// CFG diamond. + /// TODO: Consider moving this to PtrState. bool Partial; /// ReleaseMetadata - If the Calls are objc_release calls and they all have @@ -1251,16 +1257,6 @@ namespace { Seq = NewSeq; } - void SetSeqToRelease(MDNode *M) { - if (Seq == S_None || Seq == S_Use) { - Seq = M ? S_MovableRelease : S_Release; - RRI.ReleaseMetadata = M; - } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) { - Seq = S_Release; - RRI.ReleaseMetadata = 0; - } - } - Sequence GetSeq() const { return Seq; } @@ -1488,7 +1484,7 @@ namespace { /// metadata. unsigned ImpreciseReleaseMDKind; - /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape + /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape /// metadata. unsigned CopyOnEscapeMDKind; @@ -2255,6 +2251,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } case S_CanRelease: { const Value *Arg = I->first; @@ -2289,6 +2286,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } } } @@ -2350,8 +2348,11 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) NestingDetected = true; - S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); S.RRI.clear(); + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release); + S.RRI.ReleaseMetadata = ReleaseMetadata; S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); @@ -2494,18 +2495,16 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, if (Pred == BB) continue; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); - assert(I != BBStates.end()); // If we haven't seen this node yet, then we've found a CFG cycle. // Be optimistic here; it's CheckForCFGHazards' job detect trouble. - if (!I->second.isVisitedTopDown()) + if (I == BBStates.end() || !I->second.isVisitedTopDown()) continue; MyStates.InitFromPred(I->second); while (PI != PE) { Pred = *PI++; if (Pred != BB) { I = BBStates.find(Pred); - assert(I != BBStates.end()); - if (I->second.isVisitedTopDown()) + if (I == BBStates.end() || I->second.isVisitedTopDown()) MyStates.MergePred(I->second); } } @@ -2661,49 +2660,106 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, return NestingDetected; } -// Visit - Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { - // Use reverse-postorder on the reverse CFG for bottom-up, because we - // magically know that loops will be well behaved, i.e. they won't repeatedly - // call retain on a single pointer without doing a release. We can't use - // ReversePostOrderTraversal here because we want to walk up from each - // function exit point. +static void +ComputePostOrders(Function &F, + SmallVectorImpl<BasicBlock *> &PostOrder, + SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) { + /// Backedges - Backedges detected in the DFS. These edges will be + /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be + /// traversed in the desired order. + DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges; + + /// Visited - The visited set, for doing DFS walks. SmallPtrSet<BasicBlock *, 16> Visited; - SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack; - SmallVector<BasicBlock *, 16> Order; + + // Do DFS, computing the PostOrder. + SmallPtrSet<BasicBlock *, 16> OnStack; + SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; + BasicBlock *EntryBB = &F.getEntryBlock(); + SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB))); + Visited.insert(EntryBB); + OnStack.insert(EntryBB); + do { + dfs_next_succ: + succ_iterator End = succ_end(SuccStack.back().first); + while (SuccStack.back().second != End) { + BasicBlock *BB = *SuccStack.back().second++; + if (Visited.insert(BB)) { + SuccStack.push_back(std::make_pair(BB, succ_begin(BB))); + OnStack.insert(BB); + goto dfs_next_succ; + } + if (OnStack.count(BB)) + Backedges.insert(std::make_pair(SuccStack.back().first, BB)); + } + OnStack.erase(SuccStack.back().first); + PostOrder.push_back(SuccStack.pop_back_val().first); + } while (!SuccStack.empty()); + + Visited.clear(); + + // Compute the exits, which are the starting points for reverse-CFG DFS. + SmallVector<BasicBlock *, 4> Exits; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *BB = I; if (BB->getTerminator()->getNumSuccessors() == 0) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); + Exits.push_back(BB); } - while (!Stack.empty()) { - pred_iterator End = pred_end(Stack.back().first); - while (Stack.back().second != End) { - BasicBlock *BB = *Stack.back().second++; - if (Visited.insert(BB)) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); - } - Order.push_back(Stack.pop_back_val().first); + + // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. + SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack; + for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end(); + I != E; ++I) { + BasicBlock *ExitBB = *I; + PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB))); + Visited.insert(ExitBB); + while (!PredStack.empty()) { + reverse_dfs_next_succ: + pred_iterator End = pred_end(PredStack.back().first); + while (PredStack.back().second != End) { + BasicBlock *BB = *PredStack.back().second++; + // Skip backedges detected in the forward-CFG DFS. + if (Backedges.count(std::make_pair(BB, PredStack.back().first))) + continue; + if (Visited.insert(BB)) { + PredStack.push_back(std::make_pair(BB, pred_begin(BB))); + goto reverse_dfs_next_succ; + } + } + ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); + } } +} + +// Visit - Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + + // Use reverse-postorder traversals, because we magically know that loops + // will be well behaved, i.e. they won't repeatedly call retain on a single + // pointer without doing a release. We can't use the ReversePostOrderTraversal + // class here because we want the reverse-CFG postorder to consider each + // function exit point, and we want to ignore selected cycle edges. + SmallVector<BasicBlock *, 16> PostOrder; + SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; + ComputePostOrders(F, PostOrder, ReverseCFGPostOrder); + + // Use reverse-postorder on the reverse CFG for bottom-up. bool BottomUpNestingDetected = false; for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = - Order.rbegin(), E = Order.rend(); I != E; ++I) { - BasicBlock *BB = *I; - BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); - } + ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); + I != E; ++I) + BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); - // Use regular reverse-postorder for top-down. + // Use reverse-postorder for top-down. bool TopDownNestingDetected = false; - typedef ReversePostOrderTraversal<Function *> RPOTType; - RPOTType RPOT(&F); - for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BasicBlock *BB = *I; - TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); - } + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + PostOrder.rbegin(), E = PostOrder.rend(); + I != E; ++I) + TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); return TopDownNestingDetected && BottomUpNestingDetected; } @@ -3139,7 +3195,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { UE = Alloca->use_end(); UI != UE; ) { CallInst *UserInst = cast<CallInst>(*UI++); if (!UserInst->use_empty()) - UserInst->replaceAllUsesWith(UserInst->getOperand(1)); + UserInst->replaceAllUsesWith(UserInst->getArgOperand(0)); UserInst->eraseFromParent(); } Alloca->eraseFromParent(); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index f6762ad..e4cb55c 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -156,6 +157,7 @@ namespace { /// class SCCPSolver : public InstVisitor<SCCPSolver> { const TargetData *TD; + const TargetLibraryInfo *TLI; SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. @@ -206,7 +208,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { typedef std::pair<BasicBlock*, BasicBlock*> Edge; DenseSet<Edge> KnownFeasibleEdges; public: - SCCPSolver(const TargetData *td) : TD(td) {} + SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli) + : TD(td), TLI(tli) {} /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -1125,7 +1128,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands)) + if (Constant *C = ConstantFoldCall(F, Operands, TLI)) return markConstant(I, C); } @@ -1517,6 +1520,9 @@ namespace { /// Sparse Conditional Constant Propagator. /// struct SCCP : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); + } static char ID; // Pass identification, replacement for typeid SCCP() : FunctionPass(ID) { initializeSCCPPass(*PassRegistry::getPassRegistry()); @@ -1569,7 +1575,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) { // bool SCCP::runOnFunction(Function &F) { DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); - SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + SCCPSolver Solver(TD, TLI); // Mark the first block of the function as being executable. Solver.MarkBlockExecutable(F.begin()); @@ -1641,6 +1649,9 @@ namespace { /// Constant Propagation. /// struct IPSCCP : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); + } static char ID; IPSCCP() : ModulePass(ID) { initializeIPSCCPPass(*PassRegistry::getPassRegistry()); @@ -1650,7 +1661,11 @@ namespace { } // end anonymous namespace char IPSCCP::ID = 0; -INITIALIZE_PASS(IPSCCP, "ipsccp", +INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp", + "Interprocedural Sparse Conditional Constant Propagation", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(IPSCCP, "ipsccp", "Interprocedural Sparse Conditional Constant Propagation", false, false) @@ -1689,7 +1704,9 @@ static bool AddressIsTaken(const GlobalValue *GV) { } bool IPSCCP::runOnModule(Module &M) { - SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + SCCPSolver Solver(TD, TLI); // AddressTakenFunctions - This set keeps track of the address-taken functions // that are in the input. As IPSCCP runs through and simplifies code, diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 4b14efc..bc70c51 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); + if (!GEP->getPointerOperandType()->isPointerTy()) + return false; uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); // See if all uses can be converted. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 6e169de..f3184ec 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -999,7 +999,7 @@ struct FFSOpt : public LibCallOptimization { Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); - Value *V = B.CreateCall(F, Op, "cttz"); + Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1293,7 +1293,8 @@ struct FWriteOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), 0); // If this is writing one byte, turn it into fputc. - if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); EmitFPutC(Char, CI->getArgOperand(3), B, TD); return ConstantInt::get(CI->getType(), 1); diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index c83f56c..ef65c0a 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" @@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst, if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!Inst->isSafeToSpeculativelyExecute()) { + if (!isSafeToSpeculativelyExecute(Inst)) { DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); return false; } diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 8e5a1eb..d831452 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, // Check to see if this value is already used in the memory instruction's // block. If so, it's already live into the block at the very least, so we // can reasonably fold it. - BasicBlock *MemBB = MemoryInst->getParent(); - for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end(); - UI != E; ++UI) - // We know that uses of arguments and instructions have to be instructions. - if (cast<Instruction>(*UI)->getParent() == MemBB) - return true; - - return false; + return Val->isUsedInBasicBlock(MemoryInst->getParent()); } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index a7f9efd..ef4a473 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -453,9 +453,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - BasicBlock *const *Preds, - unsigned NumPreds, const char *Suffix, - Pass *P) { + ArrayRef<BasicBlock*> Preds, + const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); @@ -464,7 +463,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. - for (unsigned i = 0; i != NumPreds; ++i) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. @@ -477,7 +476,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. - if (NumPreds == 0) { + if (Preds.size() == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); @@ -486,12 +485,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), - P, HasLoopExit); + UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI, - P, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit); return NewBB; } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index c052910..f752d79 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = - SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), - "split", P); + SplitBlockPredecessors(Exit, Preds, "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 6d5432d..d96f59c 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_library(LLVMTransformUtils Local.cpp LoopSimplify.cpp LoopUnroll.cpp + LoopUnrollRuntime.cpp LowerExpectIntrinsic.cpp LowerInvoke.cpp LowerSwitch.cpp @@ -28,11 +29,3 @@ add_llvm_library(LLVMTransformUtils Utils.cpp ValueMapper.cpp ) - -add_llvm_library_dependencies(LLVMTransformUtils - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - LLVMipa - ) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index dd4a659..f89e1b1 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -924,40 +924,44 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { return false; } - // Find the personality function used by the landing pads of the caller. If it - // exists, then check to see that it matches the personality function used in - // the callee. - for (Function::const_iterator - I = Caller->begin(), E = Caller->end(); I != E; ++I) + // Get the personality function from the callee if it contains a landing pad. + Value *CalleePersonality = 0; + for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); + I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'isa' here should become go away once the new EH system is - // in place. - if (!isa<LandingPadInst>(BB->getFirstNonPHI())) - continue; - const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); - const Value *CallerPersFn = LP->getPersonalityFn(); - - // If the personality functions match, then we can perform the - // inlining. Otherwise, we can't inline. - // TODO: This isn't 100% true. Some personality functions are proper - // supersets of others and can be used in place of the other. - for (Function::const_iterator - I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) - if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { - const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once - // the new EH system is in place. - if (const LandingPadInst *LP = - dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) - if (CallerPersFn != LP->getPersonalityFn()) - return false; - break; - } - + // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once + // the new EH system is in place. + if (const LandingPadInst *LP = + dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) + CalleePersonality = LP->getPersonalityFn(); break; } + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + if (CalleePersonality) + for (Function::const_iterator I = Caller->begin(), E = Caller->end(); + I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'isa' here should become go away once the new EH system + // is in place. + if (!isa<LandingPadInst>(BB->getFirstNonPHI())) + continue; + const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + if (LP->getPersonalityFn() != CalleePersonality) + return false; + + break; + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt index dea7b02..88b2ffe 100644 --- a/lib/Transforms/Utils/LLVMBuild.txt +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = TransformUtils parent = Transforms required_libraries = Analysis Core IPA Support Target - diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 134ab71..4dd93cf 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -494,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { if (Succ->getSinglePredecessor()) return true; // Make a list of the predecessors of BB - typedef SmallPtrSet<BasicBlock*, 16> BlockSet; - BlockSet BBPreds(pred_begin(BB), pred_end(BB)); - - // Use that list to make another list of common predecessors of BB and Succ - BlockSet CommonPreds; - for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) { - BasicBlock *P = *PI; - if (BBPreds.count(P)) - CommonPreds.insert(P); - } + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - // Shortcut, if there are no common predecessors, merging is always safe - if (CommonPreds.empty()) - return true; - // Look at all the phi nodes in Succ, to see if they present a conflict when // merging these blocks for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { @@ -520,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // merge the phi nodes and then the blocks can still be merged PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); if (BBPN && BBPN->getParent() == BB) { - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { - if (BBPN->getIncomingValueForBlock(*PI) - != PN->getIncomingValueForBlock(*PI)) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " - << (*PI)->getName() << "\n"); + << IBB->getName() << "\n"); return false; } } } else { Value* Val = PN->getIncomingValueForBlock(BB); - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { // See if the incoming value for the common predecessor is equal to the // one for BB, in which case this phi node will not prevent the merging // of the block. - if (Val != PN->getIncomingValueForBlock(*PI)) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getName() << "\n"); + << "predecessor " << IBB->getName() << "\n"); return false; } } @@ -748,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // If there is a large requested alignment and we can, bump up the alignment // of the global. if (GV->isDeclaration()) return Align; + // If the memory we set aside for the global may not be the memory used by + // the final program then it is impossible for us to reliably enforce the + // preferred alignment. + if (GV->isWeakForLinker()) return Align; if (GV->getAlignment() >= PrefAlign) return GV->getAlignment(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index cbd54a8..4376265 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -99,7 +99,8 @@ namespace { bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); BasicBlock *InsertPreheaderForLoop(Loop *L); - Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); + Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader); BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); void PlaceSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl<BasicBlock*> &SplitPreds, @@ -240,7 +241,7 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { - if (SeparateNestedLoop(L, LPM)) { + if (SeparateNestedLoop(L, LPM, Preheader)) { ++NumNested; // This is a big restructuring change, reprocess the whole loop. Changed = true; @@ -265,7 +266,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { } // Split out the loop pre-header. - BasicBlock *NewBB = - SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), - ".preheader", this); + BasicBlock *PreheaderBB; + if (!Header->isLandingPad()) { + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + this); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", + ".split-lp", this, NewBBs); + PreheaderBB = NewBBs[0]; + } - NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); - DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() - << "\n"); + PreheaderBB->getTerminator()->setDebugLoc( + Header->getFirstNonPHI()->getDebugLoc()); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. - PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); + PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); - return NewBB; + return PreheaderBB; } /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit @@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { this, NewBBs); NewExitBB = NewBBs[0]; } else { - NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], - LoopBlocks.size(), ".loopexit", - this); + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this); } DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " @@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -529,7 +536,17 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// If we are able to separate out a loop, return the new outer loop that was /// created. /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader) { + // Don't try to separate loops without a preheader (this excludes + // loop headers which are targeted by an indirectbr). + if (!Preheader) + return 0; + + // The header is not a landing pad; preheader insertion should ensure this. + assert(!L->getHeader()->isLandingPad() && + "Can't insert backedge to landing pad"); + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); if (PN == 0) return 0; // No known way to partition. @@ -539,13 +556,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SmallVector<BasicBlock*, 8> OuterLoopPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) { - // We can't split indirectbr edges. - if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) - return 0; - + !L->contains(PN->getIncomingBlock(i))) OuterLoopPreds.push_back(PN->getIncomingBlock(i)); - } DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); @@ -556,9 +568,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SE->forgetLoop(L); BasicBlock *Header = L->getHeader(); - BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], - OuterLoopPreds.size(), - ".outer", this); + BasicBlock *NewBB = + SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -640,6 +651,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { if (!Preheader) return 0; + // The header is not a landing pad; preheader insertion should ensure this. + assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); + // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 62e4fa2..b96f14b 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { + bool AllowRuntime, unsigned TripMultiple, + LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -165,12 +166,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, return false; } - // Notify ScalarEvolution that the loop will be substantially changed, - // if not outright eliminated. - ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); - if (SE) - SE->forgetLoop(L); - if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) @@ -188,6 +183,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + // We assume a run-time trip count if the compiler cannot + // figure out the loop trip count and the unroll-runtime + // flag is specified. + bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + + if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + return false; + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) + SE->forgetLoop(L); + // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { @@ -209,6 +218,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + } else if (RuntimeTripCount) { + DEBUG(dbgs() << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } @@ -332,6 +343,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, BasicBlock *Dest = Headers[j]; bool NeedConditional = true; + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp new file mode 100644 index 0000000..b351852 --- /dev/null +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -0,0 +1,374 @@ +//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for loops with run-time +// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time +// trip counts. +// +// The functions in this file are used to generate extra code when the +// run-time trip count modulo the unroll factor is not 0. When this is the +// case, we need to generate code to execute these 'left over' iterations. +// +// The current strategy generates an if-then-else sequence prior to the +// unrolled loop to execute the 'left over' iterations. Other strategies +// include generate a loop before or after the unrolled loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/BasicBlock.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <algorithm> + +using namespace llvm; + +STATISTIC(NumRuntimeUnrolled, + "Number of loops unrolled with run-time trip counts"); + +/// Connect the unrolling prolog code to the original loop. +/// The unrolling prolog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Create PHI nodes at prolog end block to combine values +/// that exit the prolog code and jump around the prolog. +/// - Add a PHI operand to a PHI node at the loop exit block +/// for values that exit the prolog and go around the loop. +/// - Branch around the original loop if the trip count is less +/// than the unroll factor. +/// +static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, + BasicBlock *LastPrologBB, BasicBlock *PrologEnd, + BasicBlock *OrigPH, BasicBlock *NewPH, + ValueToValueMapTy &LVMap, Pass *P) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch != 0 && "Loop must have a latch"); + + // Create a PHI node for each outgoing value from the original loop + // (which means it is an outgoing value from the prolog code too). + // The new PHI node is inserted in the prolog end basic block. + // The new PHI name is added as an operand of a PHI node in either + // the loop header or the loop exit block. + for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); + SBI != SBE; ++SBI) { + for (BasicBlock::iterator BBI = (*SBI)->begin(); + PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { + + // Add a new PHI node to the prolog end block and add the + // appropriate incoming values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", + PrologEnd->getTerminator()); + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. + if (L->contains(PN)) { + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); + } else { + NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH); + } + Value *OrigVal = PN->getIncomingValueForBlock(Latch); + Value *V = OrigVal; + if (Instruction *I = dyn_cast<Instruction>(V)) { + if (L->contains(I)) { + V = LVMap[I]; + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(V, LastPrologBB); + + // Update the existing PHI node operand with the value from the + // new PHI node. How this is done depends on if the existing + // PHI node is in the original loop block, or the exit block. + if (L->contains(PN)) { + PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); + } else { + PN->addIncoming(NewPN, PrologEnd); + } + } + } + + // Create a branch around the orignal loop, which is taken if the + // trip count is less than the unroll factor. + Instruction *InsertPt = PrologEnd->getTerminator(); + Instruction *BrLoopExit = + new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, + ConstantInt::get(TripCount->getType(), Count)); + BasicBlock *Exit = L->getUniqueExitBlock(); + assert(Exit != 0 && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); + if (!Exit->isLandingPad()) { + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", + P, NewBBs); + } + // Add the branch to the exit block (around the unrolled loop) + BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); + InsertPt->eraseFromParent(); +} + +/// Create a clone of the blocks in a loop and connect them together. +/// This function doesn't create a clone of the loop structure. +/// +/// There are two value maps that are defined and used. VMap is +/// for the values in the current loop instance. LVMap contains +/// the values from the last loop instance. We need the LVMap values +/// to update the inital values for the current loop instance. +/// +static void CloneLoopBlocks(Loop *L, + bool FirstCopy, + BasicBlock *InsertTop, + BasicBlock *InsertBot, + std::vector<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, + ValueToValueMapTy &LVMap, + LoopInfo *LI) { + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); + NewBlocks.push_back(NewBB); + + if (Loop *ParentLoop = L->getParentLoop()) + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block + InsertTop->getTerminator()->setSuccessor(0, NewBB); + + // Change the incoming values to the ones defined in the + // previously cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[I]); + if (FirstCopy) { + // We replace the first phi node with the value from the preheader + VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); + NewBB->getInstList().erase(NewPHI); + } else { + // Update VMap with values from the previous block + unsigned idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + if (Instruction *I = dyn_cast<Instruction>(InVal)) + if (L->contains(I)) + InVal = LVMap[InVal]; + NewPHI->setIncomingValue(idx, InVal); + NewPHI->setIncomingBlock(idx, InsertTop); + } + } + } + + if (Latch == *BB) { + VMap.erase((*BB)->getTerminator()); + NewBB->getTerminator()->eraseFromParent(); + BranchInst::Create(InsertBot, NewBB); + } + } + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) { + LVMap[VI->first] = VI->second; + } +} + +/// Insert code in the prolog code when unrolling a loop with a +/// run-time trip-count. +/// +/// This method assumes that the loop unroll factor is total number +/// of loop bodes in the loop after unrolling. (Some folks refer +/// to the unroll factor as the number of *extra* copies added). +/// We assume also that the loop unroll factor is a power-of-two. So, after +/// unrolling the loop, the number of loop bodies executed is 2, +/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch +/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for +/// the switch instruction is generated. +/// +/// extraiters = tripcount % loopfactor +/// if (extraiters == 0) jump Loop: +/// if (extraiters == loopfactor) jump L1 +/// if (extraiters == loopfactor-1) jump L2 +/// ... +/// L1: LoopBody; +/// L2: LoopBody; +/// ... +/// if tripcount < loopfactor jump End +/// Loop: +/// ... +/// End: +/// +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, + LPPassManager *LPM) { + // for now, only unroll loops that contain a single exit + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() > 1) + return false; + + // Make sure the loop is in canonical form, and there is a single + // exit block only. + if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + return false; + + // Use Scalar Evolution to compute the trip count. This allows more + // loops to be unrolled than relying on induction var simplification + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE == 0) + return false; + + // Only unroll loops with a computable trip count and the trip count needs + // to be an int value (allowing a pointer type is a TODO item) + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) + return false; + + // Add 1 since the backedge count doesn't include the first loop iteration + const SCEV *TripCountSC = + SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); + if (isa<SCEVCouldNotCompute>(TripCountSC)) + return false; + + // We only handle cases when the unroll factor is a power of 2. + // Count is the loop unroll factor, the number of extra copies added + 1. + if ((Count & (Count-1)) != 0) + return false; + + // If this loop is nested, then the loop unroller changes the code in + // parent loop, so the Scalar Evolution pass needs to be run again + if (Loop *ParentLoop = L->getParentLoop()) + SE->forgetLoop(ParentLoop); + + BasicBlock *PH = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + // It helps to splits the original preheader twice, one for the end of the + // prolog code and one for a new loop preheader + BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); + BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); + BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % (loop unroll factor + 1) + SCEVExpander Expander(*SE, "loop-unroll"); + Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), + PreHeaderBR); + Type *CountTy = TripCount->getType(); + BinaryOperator *ModVal = + BinaryOperator::CreateURem(TripCount, + ConstantInt::get(CountTy, Count), + "xtraiter"); + ModVal->insertBefore(PreHeaderBR); + + // Check if for no extra iterations, then jump to unrolled loop + Value *BranchVal = new ICmpInst(PreHeaderBR, + ICmpInst::ICMP_NE, ModVal, + ConstantInt::get(CountTy, 0), "lcmp"); + // Branch to either the extra iterations or the unrolled loop + // We will fix up the true branch label when adding loop body copies + BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); + assert(PreHeaderBR->isUnconditional() && + PreHeaderBR->getSuccessor(0) == PEnd && + "CFG edges in Preheader are not correct"); + PreHeaderBR->eraseFromParent(); + + ValueToValueMapTy LVMap; + Function *F = Header->getParent(); + // These variables are used to update the CFG links in each iteration + BasicBlock *CompareBB = 0; + BasicBlock *LastLoopBB = PH; + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { + std::vector<BasicBlock*> NewBlocks; + ValueToValueMapTy VMap; + + // Clone all the basic blocks in the loop, but we don't clone the loop + // This function adds the appropriate CFG connections. + CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, + LoopBlocks, VMap, LVMap, LI); + LastLoopBB = cast<BasicBlock>(VMap[Latch]); + + // Insert the cloned blocks into function just before the original loop + F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Generate the code for the comparison which determines if the loop + // prolog code needs to be executed. + if (leftOverIters == Count-1) { + // There is no compare block for the fall-thru case when for the last + // left over iteration + CompareBB = NewBlocks[0]; + } else { + // Create a new block for the comparison + BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", + F, CompareBB); + if (Loop *ParentLoop = L->getParentLoop()) { + // Add the new block to the parent loop, if needed + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } + + // The comparison w/ the extra iteration value and branch + Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, + ConstantInt::get(CountTy, leftOverIters), + "un.tmp"); + // Branch to either the extra iterations or the unrolled loop + BranchInst::Create(NewBlocks[0], CompareBB, + BranchVal, NewBB); + CompareBB = NewBB; + PH->getTerminator()->setSuccessor(0, NewBB); + VMap[NewPH] = CompareBB; + } + + // Rewrite the cloned instruction operands to use the values + // created when the clone is created. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) { + RemapInstruction(I, VMap, + RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + } + } + } + + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, + LPM->getAsPass()); + NumRuntimeUnrolled++; + return true; +} diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 5e294a3..8491c55 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -19,7 +19,8 @@ using namespace llvm; -void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { +static void appendToGlobalArray(const char *Array, + Module &M, Function *F, int Priority) { IRBuilder<> IRB(M.getContext()); FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); StructType *Ty = StructType::get( @@ -31,7 +32,7 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { // Get the current set of static global constructors and add the new ctor // to the list. SmallVector<Constant *, 16> CurrentCtors; - if (GlobalVariable * GVCtor = M.getNamedGlobal("llvm.global_ctors")) { + if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); @@ -51,6 +52,13 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { // Create the new global variable and replace all uses of // the old global variable with the new one. (void)new GlobalVariable(M, NewInit->getType(), false, - GlobalValue::AppendingLinkage, NewInit, - "llvm.global_ctors"); + GlobalValue::AppendingLinkage, NewInit, Array); +} + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority); +} + +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority); } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index db3e942..e8f4285 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -590,7 +590,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b8c3ab4..bf2cb49 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -257,7 +257,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!I->isSafeToSpeculativelyExecute()) + if (!isSafeToSpeculativelyExecute(I)) return false; unsigned Cost = 0; @@ -1487,7 +1487,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *BonusInst = 0; if (&*FrontIt != Cond && FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && - FrontIt->isSafeToSpeculativelyExecute()) { + isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index ac005f9..81eb9e0 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -39,12 +40,14 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } /// runOnFunction - Remove instructions that simplify. bool runOnFunction(Function &F) { const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -60,7 +63,7 @@ namespace { continue; // Don't waste time simplifying unused instructions. if (!I->use_empty()) - if (Value *V = SimplifyInstruction(I, TD, DT)) { + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) @@ -84,8 +87,11 @@ namespace { } char InstSimplifier::ID = 0; -INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions", - false, false) +INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) char &llvm::InstructionSimplifierID = InstSimplifier::ID; // Public interface to the simplify instructions pass. diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index d7863f5..4fb5fd3 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -2110,3 +2110,6 @@ void Type::dump() const { print(dbgs()); } // Module::dump() - Allow printing of Modules from the debugger. void Module::dump() const { print(dbgs(), 0); } + +// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger. +void NamedMDNode::dump() const { print(dbgs(), 0); } diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index b849d3e..59424f9 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -38,105 +38,21 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return false; Name = Name.substr(5); // Strip off "llvm." - FunctionType *FTy = F->getFunctionType(); - Module *M = F->getParent(); - switch (Name[0]) { default: break; - case 'a': - if (Name.startswith("atomic.cmp.swap") || - Name.startswith("atomic.swap") || - Name.startswith("atomic.load.add") || - Name.startswith("atomic.load.sub") || - Name.startswith("atomic.load.and") || - Name.startswith("atomic.load.nand") || - Name.startswith("atomic.load.or") || - Name.startswith("atomic.load.xor") || - Name.startswith("atomic.load.max") || - Name.startswith("atomic.load.min") || - Name.startswith("atomic.load.umax") || - Name.startswith("atomic.load.umin")) - return true; - case 'i': - // This upgrades the old llvm.init.trampoline to the new - // llvm.init.trampoline and llvm.adjust.trampoline pair. - if (Name == "init.trampoline") { - // The new llvm.init.trampoline returns nothing. - if (FTy->getReturnType()->isVoidTy()) - break; - - assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!"); - - // Change the name of the old intrinsic so that we can play with its type. - std::string NameTmp = F->getName(); - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction( - NameTmp, - Type::getVoidTy(M->getContext()), - FTy->getParamType(0), FTy->getParamType(1), - FTy->getParamType(2), (Type *)0)); + case 'c': { + if (Name.startswith("ctlz.") && F->arg_size() == 1) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + F->arg_begin()->getType()); return true; } - case 'm': - if (Name == "memory.barrier") - return true; - case 'p': - // This upgrades the llvm.prefetch intrinsic to accept one more parameter, - // which is a instruction / data cache identifier. The old version only - // implicitly accepted the data version. - if (Name == "prefetch") { - // Don't do anything if it has the correct number of arguments already - if (FTy->getNumParams() == 4) - break; - - assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!"); - // We first need to change the name of the old (bad) intrinsic, because - // its type is incorrect, but we cannot overload that name. We - // arbitrarily unique it here allowing us to construct a correctly named - // and typed function below. - std::string NameTmp = F->getName(); - F->setName(""); - NewFn = cast<Function>(M->getOrInsertFunction(NameTmp, - FTy->getReturnType(), - FTy->getParamType(0), - FTy->getParamType(1), - FTy->getParamType(2), - FTy->getParamType(2), - (Type*)0)); + if (Name.startswith("cttz.") && F->arg_size() == 1) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz, + F->arg_begin()->getType()); return true; } - - break; - case 'x': { - const char *NewFnName = NULL; - // This fixes the poorly named crc32 intrinsics. - if (Name == "x86.sse42.crc32.8") - NewFnName = "llvm.x86.sse42.crc32.32.8"; - else if (Name == "x86.sse42.crc32.16") - NewFnName = "llvm.x86.sse42.crc32.32.16"; - else if (Name == "x86.sse42.crc32.32") - NewFnName = "llvm.x86.sse42.crc32.32.32"; - else if (Name == "x86.sse42.crc64.8") - NewFnName = "llvm.x86.sse42.crc32.64.8"; - else if (Name == "x86.sse42.crc64.64") - NewFnName = "llvm.x86.sse42.crc32.64.64"; - - if (NewFnName) { - F->setName(NewFnName); - NewFn = F; - return true; - } - - // Calls to these instructions are transformed into unaligned loads. - if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" || - Name == "x86.sse2.loadu.pd") - return true; - - // Calls to these instructions are transformed into nontemporal stores. - if (Name == "x86.sse.movnt.ps" || Name == "x86.sse2.movnt.dq" || - Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i") - return true; - break; } } @@ -169,190 +85,27 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { // upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { - Function *F = CI->getCalledFunction(); - LLVMContext &C = CI->getContext(); - ImmutableCallSite CS(CI); - - assert(F && "CallInst has no function associated with it."); - - if (!NewFn) { - if (F->getName() == "llvm.x86.sse.loadu.ps" || - F->getName() == "llvm.x86.sse2.loadu.dq" || - F->getName() == "llvm.x86.sse2.loadu.pd") { - // Convert to a native, unaligned load. - Type *VecTy = CI->getType(); - Type *IntTy = IntegerType::get(C, 128); - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - - Value *BC = Builder.CreateBitCast(CI->getArgOperand(0), - PointerType::getUnqual(IntTy), - "cast"); - LoadInst *LI = Builder.CreateLoad(BC, CI->getName()); - LI->setAlignment(1); // Unaligned load. - BC = Builder.CreateBitCast(LI, VecTy, "new.cast"); - - // Fix up all the uses with our new load. - if (!CI->use_empty()) - CI->replaceAllUsesWith(BC); - - // Remove intrinsic. - CI->eraseFromParent(); - } else if (F->getName() == "llvm.x86.sse.movnt.ps" || - F->getName() == "llvm.x86.sse2.movnt.dq" || - F->getName() == "llvm.x86.sse2.movnt.pd" || - F->getName() == "llvm.x86.sse2.movnt.i") { - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - - Module *M = F->getParent(); - SmallVector<Value *, 1> Elts; - Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); - MDNode *Node = MDNode::get(C, Elts); - - Value *Arg0 = CI->getArgOperand(0); - Value *Arg1 = CI->getArgOperand(1); - - // Convert the type of the pointer to a pointer to the stored type. - Value *BC = Builder.CreateBitCast(Arg0, - PointerType::getUnqual(Arg1->getType()), - "cast"); - StoreInst *SI = Builder.CreateStore(Arg1, BC); - SI->setMetadata(M->getMDKindID("nontemporal"), Node); - SI->setAlignment(16); - - // Remove intrinsic. - CI->eraseFromParent(); - } else if (F->getName().startswith("llvm.atomic.cmp.swap")) { - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0), - CI->getArgOperand(1), - CI->getArgOperand(2), - Monotonic); - - // Replace intrinsic. - Val->takeName(CI); - if (!CI->use_empty()) - CI->replaceAllUsesWith(Val); - CI->eraseFromParent(); - } else if (F->getName().startswith("llvm.atomic")) { - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - - AtomicRMWInst::BinOp Op; - if (F->getName().startswith("llvm.atomic.swap")) - Op = AtomicRMWInst::Xchg; - else if (F->getName().startswith("llvm.atomic.load.add")) - Op = AtomicRMWInst::Add; - else if (F->getName().startswith("llvm.atomic.load.sub")) - Op = AtomicRMWInst::Sub; - else if (F->getName().startswith("llvm.atomic.load.and")) - Op = AtomicRMWInst::And; - else if (F->getName().startswith("llvm.atomic.load.nand")) - Op = AtomicRMWInst::Nand; - else if (F->getName().startswith("llvm.atomic.load.or")) - Op = AtomicRMWInst::Or; - else if (F->getName().startswith("llvm.atomic.load.xor")) - Op = AtomicRMWInst::Xor; - else if (F->getName().startswith("llvm.atomic.load.max")) - Op = AtomicRMWInst::Max; - else if (F->getName().startswith("llvm.atomic.load.min")) - Op = AtomicRMWInst::Min; - else if (F->getName().startswith("llvm.atomic.load.umax")) - Op = AtomicRMWInst::UMax; - else if (F->getName().startswith("llvm.atomic.load.umin")) - Op = AtomicRMWInst::UMin; - else - llvm_unreachable("Unknown atomic"); - - Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0), - CI->getArgOperand(1), - Monotonic); - - // Replace intrinsic. - Val->takeName(CI); - if (!CI->use_empty()) - CI->replaceAllUsesWith(Val); - CI->eraseFromParent(); - } else if (F->getName() == "llvm.memory.barrier") { - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - - // Note that this conversion ignores the "device" bit; it was not really - // well-defined, and got abused because nobody paid enough attention to - // get it right. In practice, this probably doesn't matter; application - // code generally doesn't need anything stronger than - // SequentiallyConsistent (and realistically, SequentiallyConsistent - // is lowered to a strong enough barrier for almost anything). - - if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue()) - Builder.CreateFence(SequentiallyConsistent); - else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue()) - Builder.CreateFence(Release); - else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue()) - Builder.CreateFence(Acquire); - else - Builder.CreateFence(AcquireRelease); + assert(CI->getCalledFunction() && "Intrinsic call is not direct?"); + if (!NewFn) return; - // Remove intrinsic. - CI->eraseFromParent(); - } else { - llvm_unreachable("Unknown function for CallInst upgrade."); - } - return; - } + LLVMContext &C = CI->getContext(); + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); switch (NewFn->getIntrinsicID()) { - case Intrinsic::prefetch: { - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI->getParent(), CI); - llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext()); - - // Add the extra "data cache" argument - Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), - llvm::ConstantInt::get(I32Ty, 1) }; - CallInst *NewCI = CallInst::Create(NewFn, Operands, - CI->getName(), CI); - NewCI->setTailCall(CI->isTailCall()); - NewCI->setCallingConv(CI->getCallingConv()); - // Handle any uses of the old CallInst. - if (!CI->use_empty()) - // Replace all uses of the old call with the new cast which has the - // correct type. - CI->replaceAllUsesWith(NewCI); - - // Clean up the old call now that it has been completely upgraded. - CI->eraseFromParent(); - break; - } - case Intrinsic::init_trampoline: { - - // Transform - // %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z) - // to - // call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z) - // %tramp = call i8* llvm.adjust.trampoline (i8* %x) - - Function *AdjustTrampolineFn = - cast<Function>(Intrinsic::getDeclaration(F->getParent(), - Intrinsic::adjust_trampoline)); - - IRBuilder<> Builder(C); - Builder.SetInsertPoint(CI); - - Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2)); - - CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn, - CI->getArgOperand(0), - CI->getName()); - if (!CI->use_empty()) - CI->replaceAllUsesWith(AdjustCall); + default: + llvm_unreachable("Unknown function for CallInst upgrade."); + + case Intrinsic::ctlz: + case Intrinsic::cttz: + assert(CI->getNumArgOperands() == 1 && + "Mismatch between function args and call args"); + StringRef Name = CI->getName(); + CI->setName(Name + ".old"); + CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0), + Builder.getFalse(), Name)); CI->eraseFromParent(); - break; - } + return; } } @@ -378,291 +131,3 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) { } } -/// This function strips all debug info intrinsics, except for llvm.dbg.declare. -/// If an llvm.dbg.declare intrinsic is invalid, then this function simply -/// strips that use. -void llvm::CheckDebugInfoIntrinsics(Module *M) { - if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) { - while (!FuncStart->use_empty()) - cast<CallInst>(FuncStart->use_back())->eraseFromParent(); - FuncStart->eraseFromParent(); - } - - if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) { - while (!StopPoint->use_empty()) - cast<CallInst>(StopPoint->use_back())->eraseFromParent(); - StopPoint->eraseFromParent(); - } - - if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) { - while (!RegionStart->use_empty()) - cast<CallInst>(RegionStart->use_back())->eraseFromParent(); - RegionStart->eraseFromParent(); - } - - if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) { - while (!RegionEnd->use_empty()) - cast<CallInst>(RegionEnd->use_back())->eraseFromParent(); - RegionEnd->eraseFromParent(); - } - - if (Function *Declare = M->getFunction("llvm.dbg.declare")) { - if (!Declare->use_empty()) { - DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back()); - if (!isa<MDNode>(DDI->getArgOperand(0)) || - !isa<MDNode>(DDI->getArgOperand(1))) { - while (!Declare->use_empty()) { - CallInst *CI = cast<CallInst>(Declare->use_back()); - CI->eraseFromParent(); - } - Declare->eraseFromParent(); - } - } - } -} - -/// FindExnAndSelIntrinsics - Find the eh_exception and eh_selector intrinsic -/// calls reachable from the unwind basic block. -static void FindExnAndSelIntrinsics(BasicBlock *BB, CallInst *&Exn, - CallInst *&Sel, - SmallPtrSet<BasicBlock*, 8> &Visited) { - if (!Visited.insert(BB)) return; - - for (BasicBlock::iterator - I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast<CallInst>(I)) { - switch (CI->getCalledFunction()->getIntrinsicID()) { - default: break; - case Intrinsic::eh_exception: - assert(!Exn && "Found more than one eh.exception call!"); - Exn = CI; - break; - case Intrinsic::eh_selector: - assert(!Sel && "Found more than one eh.selector call!"); - Sel = CI; - break; - } - - if (Exn && Sel) return; - } - } - - if (Exn && Sel) return; - - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - FindExnAndSelIntrinsics(*I, Exn, Sel, Visited); - if (Exn && Sel) return; - } -} - -/// TransferClausesToLandingPadInst - Transfer the exception handling clauses -/// from the eh_selector call to the new landingpad instruction. -static void TransferClausesToLandingPadInst(LandingPadInst *LPI, - CallInst *EHSel) { - LLVMContext &Context = LPI->getContext(); - unsigned N = EHSel->getNumArgOperands(); - - for (unsigned i = N - 1; i > 1; --i) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(EHSel->getArgOperand(i))){ - unsigned FilterLength = CI->getZExtValue(); - unsigned FirstCatch = i + FilterLength + !FilterLength; - assert(FirstCatch <= N && "Invalid filter length"); - - if (FirstCatch < N) - for (unsigned j = FirstCatch; j < N; ++j) { - Value *Val = EHSel->getArgOperand(j); - if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") { - LPI->addClause(EHSel->getArgOperand(j)); - } else { - GlobalVariable *GV = cast<GlobalVariable>(Val); - LPI->addClause(GV->getInitializer()); - } - } - - if (!FilterLength) { - // Cleanup. - LPI->setCleanup(true); - } else { - // Filter. - SmallVector<Constant *, 4> TyInfo; - TyInfo.reserve(FilterLength - 1); - for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(cast<Constant>(EHSel->getArgOperand(j))); - ArrayType *AType = - ArrayType::get(!TyInfo.empty() ? TyInfo[0]->getType() : - PointerType::getUnqual(Type::getInt8Ty(Context)), - TyInfo.size()); - LPI->addClause(ConstantArray::get(AType, TyInfo)); - } - - N = i; - } - } - - if (N > 2) - for (unsigned j = 2; j < N; ++j) { - Value *Val = EHSel->getArgOperand(j); - if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") { - LPI->addClause(EHSel->getArgOperand(j)); - } else { - GlobalVariable *GV = cast<GlobalVariable>(Val); - LPI->addClause(GV->getInitializer()); - } - } -} - -/// This function upgrades the old pre-3.0 exception handling system to the new -/// one. N.B. This will be removed in 3.1. -void llvm::UpgradeExceptionHandling(Module *M) { - Function *EHException = M->getFunction("llvm.eh.exception"); - Function *EHSelector = M->getFunction("llvm.eh.selector"); - if (!EHException || !EHSelector) - return; - - LLVMContext &Context = M->getContext(); - Type *ExnTy = PointerType::getUnqual(Type::getInt8Ty(Context)); - Type *SelTy = Type::getInt32Ty(Context); - Type *LPadSlotTy = StructType::get(ExnTy, SelTy, NULL); - - // This map links the invoke instruction with the eh.exception and eh.selector - // calls associated with it. - DenseMap<InvokeInst*, std::pair<Value*, Value*> > InvokeToIntrinsicsMap; - for (Module::iterator - I = M->begin(), E = M->end(); I != E; ++I) { - Function &F = *I; - - for (Function::iterator - II = F.begin(), IE = F.end(); II != IE; ++II) { - BasicBlock *BB = &*II; - InvokeInst *Inst = dyn_cast<InvokeInst>(BB->getTerminator()); - if (!Inst) continue; - BasicBlock *UnwindDest = Inst->getUnwindDest(); - if (UnwindDest->isLandingPad()) continue; // Already converted. - - SmallPtrSet<BasicBlock*, 8> Visited; - CallInst *Exn = 0; - CallInst *Sel = 0; - FindExnAndSelIntrinsics(UnwindDest, Exn, Sel, Visited); - assert(Exn && Sel && "Cannot find eh.exception and eh.selector calls!"); - InvokeToIntrinsicsMap[Inst] = std::make_pair(Exn, Sel); - } - } - - // This map stores the slots where the exception object and selector value are - // stored within a function. - DenseMap<Function*, std::pair<Value*, Value*> > FnToLPadSlotMap; - SmallPtrSet<Instruction*, 32> DeadInsts; - for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator - I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end(); - I != E; ++I) { - InvokeInst *Invoke = I->first; - BasicBlock *UnwindDest = Invoke->getUnwindDest(); - Function *F = UnwindDest->getParent(); - std::pair<Value*, Value*> EHIntrinsics = I->second; - CallInst *Exn = cast<CallInst>(EHIntrinsics.first); - CallInst *Sel = cast<CallInst>(EHIntrinsics.second); - - // Store the exception object and selector value in the entry block. - Value *ExnSlot = 0; - Value *SelSlot = 0; - if (!FnToLPadSlotMap[F].first) { - BasicBlock *Entry = &F->front(); - ExnSlot = new AllocaInst(ExnTy, "exn", Entry->getTerminator()); - SelSlot = new AllocaInst(SelTy, "sel", Entry->getTerminator()); - FnToLPadSlotMap[F] = std::make_pair(ExnSlot, SelSlot); - } else { - ExnSlot = FnToLPadSlotMap[F].first; - SelSlot = FnToLPadSlotMap[F].second; - } - - if (!UnwindDest->getSinglePredecessor()) { - // The unwind destination doesn't have a single predecessor. Create an - // unwind destination which has only one predecessor. - BasicBlock *NewBB = BasicBlock::Create(Context, "new.lpad", - UnwindDest->getParent()); - BranchInst::Create(UnwindDest, NewBB); - Invoke->setUnwindDest(NewBB); - - // Fix up any PHIs in the original unwind destination block. - for (BasicBlock::iterator - II = UnwindDest->begin(); isa<PHINode>(II); ++II) { - PHINode *PN = cast<PHINode>(II); - int Idx = PN->getBasicBlockIndex(Invoke->getParent()); - if (Idx == -1) continue; - PN->setIncomingBlock(Idx, NewBB); - } - - UnwindDest = NewBB; - } - - IRBuilder<> Builder(Context); - Builder.SetInsertPoint(UnwindDest, UnwindDest->getFirstInsertionPt()); - - Value *PersFn = Sel->getArgOperand(1); - LandingPadInst *LPI = Builder.CreateLandingPad(LPadSlotTy, PersFn, 0); - Value *LPExn = Builder.CreateExtractValue(LPI, 0); - Value *LPSel = Builder.CreateExtractValue(LPI, 1); - Builder.CreateStore(LPExn, ExnSlot); - Builder.CreateStore(LPSel, SelSlot); - - TransferClausesToLandingPadInst(LPI, Sel); - - DeadInsts.insert(Exn); - DeadInsts.insert(Sel); - } - - // Replace the old intrinsic calls with the values from the landingpad - // instruction(s). These values were stored in allocas for us to use here. - for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator - I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end(); - I != E; ++I) { - std::pair<Value*, Value*> EHIntrinsics = I->second; - CallInst *Exn = cast<CallInst>(EHIntrinsics.first); - CallInst *Sel = cast<CallInst>(EHIntrinsics.second); - BasicBlock *Parent = Exn->getParent(); - - std::pair<Value*,Value*> ExnSelSlots = FnToLPadSlotMap[Parent->getParent()]; - - IRBuilder<> Builder(Context); - Builder.SetInsertPoint(Parent, Exn); - LoadInst *LPExn = Builder.CreateLoad(ExnSelSlots.first, "exn.load"); - LoadInst *LPSel = Builder.CreateLoad(ExnSelSlots.second, "sel.load"); - - Exn->replaceAllUsesWith(LPExn); - Sel->replaceAllUsesWith(LPSel); - } - - // Remove the dead instructions. - for (SmallPtrSet<Instruction*, 32>::iterator - I = DeadInsts.begin(), E = DeadInsts.end(); I != E; ++I) { - Instruction *Inst = *I; - Inst->eraseFromParent(); - } - - // Replace calls to "llvm.eh.resume" with the 'resume' instruction. Load the - // exception and selector values from the stored place. - Function *EHResume = M->getFunction("llvm.eh.resume"); - if (!EHResume) return; - - while (!EHResume->use_empty()) { - CallInst *Resume = cast<CallInst>(EHResume->use_back()); - BasicBlock *BB = Resume->getParent(); - - IRBuilder<> Builder(Context); - Builder.SetInsertPoint(BB, Resume); - - Value *LPadVal = - Builder.CreateInsertValue(UndefValue::get(LPadSlotTy), - Resume->getArgOperand(0), 0, "lpad.val"); - LPadVal = Builder.CreateInsertValue(LPadVal, Resume->getArgOperand(1), - 1, "lpad.val"); - Builder.CreateResume(LPadVal); - - // Remove all instructions after the 'resume.' - BasicBlock::iterator I = Resume; - while (I != BB->end()) { - Instruction *Inst = &*I++; - Inst->eraseFromParent(); - } - } -} diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 0404297..99eeba1 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -37,5 +37,3 @@ add_llvm_library(LLVMCore ValueTypes.cpp Verifier.cpp ) - -add_llvm_library_dependencies(LLVMCore LLVMSupport) diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 30bae71..d1a9e7a 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -2209,7 +2209,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, I != E; ++I) LastTy = *I; - if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) { + if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) { SmallVector<Value*, 16> NewIndices; NewIndices.reserve(Idxs.size() + CE->getNumOperands()); for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i) diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index cd94da1..a148912 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -1398,14 +1398,22 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) { } Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) { - assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer"); - assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral"); + assert(C->getType()->getScalarType()->isPointerTy() && + "PtrToInt source must be pointer or pointer vector"); + assert(DstTy->getScalarType()->isIntegerTy() && + "PtrToInt destination must be integer or integer vector"); + assert(C->getType()->getNumElements() == DstTy->getNumElements() && + "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::PtrToInt, C, DstTy); } Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) { - assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral"); - assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer"); + assert(C->getType()->getScalarType()->isIntegerTy() && + "IntToPtr source must be integer or integer vector"); + assert(DstTy->getScalarType()->isPointerTy() && + "IntToPtr destination must be a pointer or pointer vector"); + assert(C->getType()->getNumElements() == DstTy->getNumElements() && + "Invalid cast between a different number of vector elements"); return getFoldedCast(Instruction::IntToPtr, C, DstTy); } diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 73191c1..8c8fbf9 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -391,59 +391,6 @@ bool Instruction::isCommutative(unsigned op) { } } -bool Instruction::isSafeToSpeculativelyExecute() const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (Constant *C = dyn_cast<Constant>(getOperand(i))) - if (C->canTrap()) - return false; - - switch (getOpcode()) { - default: - return true; - case UDiv: - case URem: { - // x / y is undefined if y == 0, but calcuations like x / 3 are safe. - ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1)); - return Op && !Op->isNullValue(); - } - case SDiv: - case SRem: { - // x / y is undefined if y == 0, and might be undefined if y == -1, - // but calcuations like x / 3 are safe. - ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1)); - return Op && !Op->isNullValue() && !Op->isAllOnesValue(); - } - case Load: { - const LoadInst *LI = cast<LoadInst>(this); - if (!LI->isUnordered()) - return false; - return LI->getPointerOperand()->isDereferenceablePointer(); - } - case Call: - return false; // The called function could have undefined behavior or - // side-effects. - // FIXME: We should special-case some intrinsics (bswap, - // overflow-checking arithmetic, etc.) - case VAArg: - case Alloca: - case Invoke: - case PHI: - case Store: - case Ret: - case Br: - case IndirectBr: - case Switch: - case Unwind: - case Unreachable: - case Fence: - case LandingPad: - case AtomicRMW: - case AtomicCmpXchg: - case Resume: - return false; // Misc instructions which have effects - } -} - Instruction *Instruction::clone() const { Instruction *New = clone_impl(); New->SubclassOptionalData = SubclassOptionalData; diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index c8dcdc8..4784f0c 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -1359,6 +1359,15 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI) /// template <typename IndexTy> static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) { + if (Ptr->isVectorTy()) { + assert(IdxList.size() == 1 && + "GEP with vector pointers must have a single index"); + PointerType *PTy = dyn_cast<PointerType>( + cast<VectorType>(Ptr)->getElementType()); + assert(PTy && "Gep with invalid vector pointer found"); + return PTy->getElementType(); + } + PointerType *PTy = dyn_cast<PointerType>(Ptr); if (!PTy) return 0; // Type isn't a pointer type! Type *Agg = PTy->getElementType(); @@ -1366,7 +1375,7 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) { // Handle the special case of the empty set index set, which is always valid. if (IdxList.empty()) return Agg; - + // If there is at least one index, the top level type must be sized, otherwise // it cannot be 'stepped over'. if (!Agg->isSized()) @@ -1396,6 +1405,19 @@ Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) { return getIndexedTypeInternal(Ptr, IdxList); } +unsigned GetElementPtrInst::getAddressSpace(Value *Ptr) { + Type *Ty = Ptr->getType(); + + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) + Ty = VTy->getElementType(); + + if (PointerType *PTy = dyn_cast<PointerType>(Ty)) + return PTy->getAddressSpace(); + + assert(false && "Invalid GEP pointer type"); + return 0; +} + /// hasAllZeroIndices - Return true if all of the indices of this GEP are /// zeros. If so, the result pointer and the first operand have the same /// value, just potentially different types. @@ -2005,6 +2027,8 @@ bool BinaryOperator::isExact() const { // CastInst Class //===----------------------------------------------------------------------===// +void CastInst::anchor() {} + // Just determine if this cast only deals with integral->integral conversion. bool CastInst::isIntegerCast() const { switch (getOpcode()) { @@ -2652,9 +2676,15 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() && SrcLength == DstLength; case Instruction::PtrToInt: - return SrcTy->isPointerTy() && DstTy->isIntegerTy(); + if (SrcTy->getNumElements() != DstTy->getNumElements()) + return false; + return SrcTy->getScalarType()->isPointerTy() && + DstTy->getScalarType()->isIntegerTy(); case Instruction::IntToPtr: - return SrcTy->isIntegerTy() && DstTy->isPointerTy(); + if (SrcTy->getNumElements() != DstTy->getNumElements()) + return false; + return SrcTy->getScalarType()->isIntegerTy() && + DstTy->getScalarType()->isPointerTy(); case Instruction::BitCast: // BitCast implies a no-op cast of type only. No bits change. // However, you can't cast pointers to anything but pointers. diff --git a/lib/VMCore/LLVMBuild.txt b/lib/VMCore/LLVMBuild.txt index 45f528e..bca8b2c 100644 --- a/lib/VMCore/LLVMBuild.txt +++ b/lib/VMCore/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = Core parent = Libraries required_libraries = Support - diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index ace4dc2..8debd7c 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -425,12 +425,12 @@ StringRef NamedMDNode::getName() const { // Instruction Metadata method implementations. // -void Instruction::setMetadata(const char *Kind, MDNode *Node) { +void Instruction::setMetadata(StringRef Kind, MDNode *Node) { if (Node == 0 && !hasMetadata()) return; setMetadata(getContext().getMDKindID(Kind), Node); } -MDNode *Instruction::getMetadataImpl(const char *Kind) const { +MDNode *Instruction::getMetadataImpl(StringRef Kind) const { return getMetadataImpl(getContext().getMDKindID(Kind)); } diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 10184bc..469defd 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -46,6 +46,14 @@ Type *Type::getScalarType() { return this; } +/// getNumElements - If this is a vector type, return the number of elements, +/// otherwise return zero. +unsigned Type::getNumElements() { + if (VectorType *VTy = dyn_cast<VectorType>(this)) + return VTy->getNumElements(); + return 0; +} + /// isIntegerTy - Return true if this is an IntegerType of the specified width. bool Type::isIntegerTy(unsigned Bitwidth) const { return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth; @@ -664,6 +672,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) { } bool VectorType::isValidElementType(Type *ElemTy) { + if (PointerType *PTy = dyn_cast<PointerType>(ElemTy)) + ElemTy = PTy->getElementType(); return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy(); } diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp index f01fa34..5f35ce4 100644 --- a/lib/VMCore/User.cpp +++ b/lib/VMCore/User.cpp @@ -17,6 +17,8 @@ namespace llvm { // User Class //===----------------------------------------------------------------------===// +void User::anchor() {} + // replaceUsesOfWith - Replaces all references to the "From" definition with // references to the "To" definition. // diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 291df91..a5f1918 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -108,6 +108,19 @@ bool Value::hasNUsesOrMore(unsigned N) const { /// isUsedInBasicBlock - Return true if this value is used in the specified /// basic block. bool Value::isUsedInBasicBlock(const BasicBlock *BB) const { + // Start by scanning over the instructions looking for a use before we start + // the expensive use iteration. + unsigned MaxBlockSize = 3; + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (std::find(I->op_begin(), I->op_end(), this) != I->op_end()) + return true; + if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator + break; + } + + if (MaxBlockSize != 0) // We scanned the entire block and found no use. + return false; + for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { const Instruction *User = dyn_cast<Instruction>(*I); if (User && User->getParent() == BB) diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 9564b7d..003de44 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -1035,8 +1035,19 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I); - Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I); + Assert1(SrcTy->getScalarType()->isPointerTy(), + "PtrToInt source must be pointer", &I); + Assert1(DestTy->getScalarType()->isIntegerTy(), + "PtrToInt result must be integral", &I); + Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "PtrToInt type mismatch", &I); + + if (SrcTy->isVectorTy()) { + VectorType *VSrc = dyn_cast<VectorType>(SrcTy); + VectorType *VDest = dyn_cast<VectorType>(DestTy); + Assert1(VSrc->getNumElements() == VDest->getNumElements(), + "PtrToInt Vector width mismatch", &I); + } visitInstruction(I); } @@ -1046,9 +1057,18 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I); - Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I); - + Assert1(SrcTy->getScalarType()->isIntegerTy(), + "IntToPtr source must be an integral", &I); + Assert1(DestTy->getScalarType()->isPointerTy(), + "IntToPtr result must be a pointer",&I); + Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(), + "IntToPtr type mismatch", &I); + if (SrcTy->isVectorTy()) { + VectorType *VSrc = dyn_cast<VectorType>(SrcTy); + VectorType *VDest = dyn_cast<VectorType>(DestTy); + Assert1(VSrc->getNumElements() == VDest->getNumElements(), + "IntToPtr Vector width mismatch", &I); + } visitInstruction(I); } @@ -1245,7 +1265,7 @@ void Verifier::visitICmpInst(ICmpInst &IC) { Assert1(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(), + Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(), "Invalid operand types for ICmp instruction", &IC); // Check that the predicate is valid. Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && @@ -1295,17 +1315,43 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) { } void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { - Assert1(cast<PointerType>(GEP.getOperand(0)->getType()) - ->getElementType()->isSized(), + Type *TargetTy = GEP.getPointerOperandType(); + if (VectorType *VTy = dyn_cast<VectorType>(TargetTy)) + TargetTy = VTy->getElementType(); + + Assert1(dyn_cast<PointerType>(TargetTy), + "GEP base pointer is not a vector or a vector of pointers", &GEP); + Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(), "GEP into unsized type!", &GEP); - + SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end()); Type *ElTy = - GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs); + GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs); Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Assert2(GEP.getType()->isPointerTy() && - cast<PointerType>(GEP.getType())->getElementType() == ElTy, - "GEP is not of right type for indices!", &GEP, ElTy); + + if (GEP.getPointerOperandType()->isPointerTy()) { + // Validate GEPs with scalar indices. + Assert2(GEP.getType()->isPointerTy() && + cast<PointerType>(GEP.getType())->getElementType() == ElTy, + "GEP is not of right type for indices!", &GEP, ElTy); + } else { + // Validate GEPs with a vector index. + Assert1(Idxs.size() == 1, "Invalid number of indices!", &GEP); + Value *Index = Idxs[0]; + Type *IndexTy = Index->getType(); + Assert1(IndexTy->isVectorTy(), + "Vector GEP must have vector indices!", &GEP); + Assert1(GEP.getType()->isVectorTy(), + "Vector GEP must return a vector value", &GEP); + Type *ElemPtr = cast<VectorType>(GEP.getType())->getElementType(); + Assert1(ElemPtr->isPointerTy(), + "Vector GEP pointer operand is not a pointer!", &GEP); + unsigned IndexWidth = cast<VectorType>(IndexTy)->getNumElements(); + unsigned GepWidth = cast<VectorType>(GEP.getType())->getNumElements(); + Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP); + Assert1(ElTy == cast<PointerType>(ElemPtr)->getElementType(), + "Vector GEP type does not match pointer type!", &GEP); + } visitInstruction(GEP); } @@ -1642,6 +1688,12 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { switch (ID) { default: break; + case Intrinsic::ctlz: // llvm.ctlz + case Intrinsic::cttz: // llvm.cttz + Assert1(isa<ConstantInt>(CI.getArgOperand(1)), + "is_zero_undef argument of bit counting intrinsics must be a " + "constant int", &CI); + break; case Intrinsic::dbg_declare: { // llvm.dbg.declare Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)), "invalid llvm.dbg.declare intrinsic call 1", &CI); |